!pip install openpyxl
Requirement already satisfied: openpyxl in c:\users\hp\appdata\local\anaconda3\lib\site-packages (3.1.5) Requirement already satisfied: et-xmlfile in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from openpyxl) (1.1.0)
Week 3 Started from here
import pandas as pd
import os
from pathlib import Path
import re
# Define folder paths
folders = {
"ReviewVs": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Reviewvs",
"Stats_Crashes": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Crashes",
"Stats_Install": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Install",
"Stats_Ratings": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Stats_Ratings",
"Supported_devices": r"C:\Users\hp\Desktop\WEEK 3_DSHub\Supported_devices"
}
# Dictionary to store merged dataframes
merged_dataframes = {}
# Process each folder
for folder_name, folder_path in folders.items():
print(f"\n{'='*60}")
print(f"Processing: {folder_name}")
print(f"{'='*60}")
# Check if folder exists
if not os.path.exists(folder_path):
print(f" Folder not found: {folder_path}")
continue
# Get all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
if not csv_files:
print(f" No CSV files found in {folder_name}")
continue
print(f"Found {len(csv_files)} CSV file(s)")
# Read and clean each CSV file
dataframes = []
for csv_file in csv_files:
file_path = os.path.join(folder_path, csv_file)
try:
# Try reading with Python engine and error handling
try:
df = pd.read_csv(file_path, encoding='utf-8', engine='python', on_bad_lines='skip', quoting=3)
except:
try:
df = pd.read_csv(file_path, encoding='latin-1', engine='python', on_bad_lines='skip', quoting=3)
except:
# Last resort: read with maximum flexibility
df = pd.read_csv(file_path, encoding='utf-8', engine='python', on_bad_lines='skip')
# Remove completely empty rows
df = df.dropna(how='all')
# Clean column names (strip whitespace)
df.columns = df.columns.str.strip()
# Handle any comma-separated values in cells
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
if len(df) > 0:
print(f" ✓ Loaded: {csv_file} ({len(df)} rows, {len(df.columns)} columns)")
dataframes.append(df)
else:
print(f" ⚠ Skipped: {csv_file} (empty after cleaning)")
except Exception as e:
print(f" ✗ Error processing {csv_file}: {str(e)}")
# Merge all dataframes in the folder
if dataframes:
if len(dataframes) == 1:
merged_df = dataframes[0]
else:
# Merge on index to combine all rows (concatenate)
merged_df = pd.concat(dataframes, ignore_index=True, sort=False)
# Clean the merged dataframe immediately
merged_df = clean_dataframe(merged_df)
merged_dataframes[folder_name] = merged_df
print(f"\n✓ Merged data: {len(merged_df)} total rows, {len(merged_df.columns)} columns")
print(f"Preview of {folder_name}:")
print(merged_df.head())
else:
print(f"\n✗ No data to merge for {folder_name}")
# Export all merged dataframes to Excel
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Output.xlsx"
print(f"\n{'='*60}")
print("Exporting to Excel...")
print(f"{'='*60}")
def clean_text(x):
if isinstance(x, str):
# Remove byte order marks (BOM) and invisible characters
x = x.replace("\ufeff", "").replace("ÿþ", "")
# Remove control and illegal Excel characters
x = re.sub(r"[\x00-\x1F\x7F-\x9F]", "", x)
# Remove any remaining non-printable Unicode
x = ''.join(ch for ch in x if ch.isprintable())
return x.strip()
return x
def clean_dataframe(df):
"""Clean dataframe using clean_text function"""
# Apply clean_text to all column names
df.columns = [clean_text(col) for col in df.columns]
# Rename problematic columns
df.columns = df.columns.str.replace('Package Name', 'Package_Name', regex=False)
df.columns = df.columns.str.replace('Package name', 'Package_Name', regex=False)
# Apply clean_text to all object columns
for col in df.columns:
if df[col].dtype == 'object':
df[col] = df[col].apply(clean_text)
return df
def get_safe_sheet_name(name, index):
"""Create a safe sheet name for Excel"""
# Excel sheet names cannot exceed 31 characters and cannot contain: \ / ? * [ ] :
safe_name = name.replace('\\', '').replace('/', '').replace('?', '').replace('*', '').replace('[', '').replace(']', '').replace(':', '')
safe_name = safe_name.strip()[:31]
return safe_name if safe_name else f"Sheet_{index}"
try:
excel_max_rows = 1048576
output_base = output_path.replace('.xlsx', '')
file_count = 0
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
df = clean_dataframe(df)
# Check if data exceeds Excel row limit
if len(df) > excel_max_rows:
print(f"\n ⚠ {folder_name} has {len(df)} rows (exceeds Excel limit of {excel_max_rows})")
print(f" Splitting into multiple files...")
# Split into multiple files
num_chunks = (len(df) // excel_max_rows) + 1
for chunk_num in range(num_chunks):
start_row = chunk_num * excel_max_rows
end_row = (chunk_num + 1) * excel_max_rows
chunk_df = df.iloc[start_row:end_row]
chunk_file = f"{output_base}_{folder_name}_Part{chunk_num + 1}.xlsx"
chunk_df.to_excel(chunk_file, sheet_name=f"Data_Pt{chunk_num + 1}", index=False, engine='openpyxl')
print(f" ✓ Part {chunk_num + 1}: '{chunk_file}' ({len(chunk_df)} rows)")
file_count += 1
else:
# Use safe sheet name function
sheet_name = get_safe_sheet_name(folder_name, idx)
df.to_excel(writer, sheet_name=sheet_name, index=False)
print(f" ✓ Sheet {idx}: '{sheet_name}' ({len(df)} rows)")
file_count += 1
print(f"\n✓ Success! Files exported:")
print(f" Main file: {output_path}")
if file_count > 5:
print(f" Plus {file_count - 1} additional split files for large datasets")
except Exception as e:
print(f"\n✗ Error during export: {str(e)}")
print("Attempting individual file export...")
# Fallback: Export each folder to separate file
try:
for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
df = clean_dataframe(df)
excel_max_rows = 1048576
if len(df) > excel_max_rows:
print(f"\n {folder_name}: {len(df)} rows - splitting...")
num_chunks = (len(df) // excel_max_rows) + 1
for chunk_num in range(num_chunks):
start_row = chunk_num * excel_max_rows
end_row = (chunk_num + 1) * excel_max_rows
chunk_df = df.iloc[start_row:end_row]
chunk_file = output_path.replace('.xlsx', f'_{folder_name}_Part{chunk_num + 1}.xlsx')
chunk_df.to_excel(chunk_file, sheet_name='Data_Pt', index=False, engine='openpyxl')
print(f" ✓ {chunk_file} ({len(chunk_df)} rows)")
else:
alt_output = output_path.replace('.xlsx', f'_{folder_name}.xlsx')
sheet_name = get_safe_sheet_name(folder_name, idx)
df.to_excel(alt_output, sheet_name=sheet_name, index=False, engine='openpyxl')
print(f" ✓ {alt_output} ({len(df)} rows)")
print(f"\n✓ Alternative export completed!")
except Exception as e2:
print(f"\n✗ Export failed: {str(e2)}")
print("Ensure openpyxl is installed: pip install openpyxl")
# Display summary
print(f"\n{'='*60}")
print("SUMMARY - 5 Merged Dataframes")
print(f"{'='*60}")
for idx, (folder_name, df) in enumerate(merged_dataframes.items(), 1):
print(f"{idx}. {folder_name}")
print(f" Shape: {df.shape[0]} rows × {df.shape[1]} columns")
print(f" Columns: {', '.join(df.columns[:5])}{'...' if len(df.columns) > 5 else ''}\n")
import pandas as pd
# === 1️⃣ Read the CSV file ===
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\cleaned_Merged_Output_Stats_Ratings.csv" # 🔁 change this to your file name
df = pd.read_csv(file_path)
# === 2️⃣ Convert 'Date' column to datetime ===
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
# Drop rows without valid dates
df = df.dropna(subset=['Date'])
# === 3️⃣ Sort data by date (from July 1 → July 31, 2024) ===
df = df.sort_values(by='Date', ascending=True)
# === 4️⃣ Save as a new sorted CSV ===
sorted_file = "July2024_Sorted.csv"
df.to_csv(sorted_file, index=False)
print("✅ Data sorted successfully by date and saved as 'July2024_Sorted.csv'")
✅ Data sorted successfully by date and saved as 'July2024_Sorted.csv'
!pip install textblob
Collecting textblob Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB) Requirement already satisfied: nltk>=3.9 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from textblob) (3.9.1) Requirement already satisfied: click in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (8.1.8) Requirement already satisfied: joblib in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (2024.11.6) Requirement already satisfied: tqdm in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from nltk>=3.9->textblob) (4.67.1) Requirement already satisfied: colorama in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from click->nltk>=3.9->textblob) (0.4.6) Downloading textblob-0.19.0-py3-none-any.whl (624 kB) ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------------------------------- 0.0/624.3 kB ? eta -:--:-- ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:-- ---------------- ----------------------- 262.1/624.3 kB ? eta -:--:-- ------------------------------- ------ 524.3/624.3 kB 656.5 kB/s eta 0:00:01 -------------------------------------- 624.3/624.3 kB 584.6 kB/s eta 0:00:00 Installing collected packages: textblob Successfully installed textblob-0.19.0
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to [nltk_data] C:\Users\hp\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date!
True
print(df.columns.tolist())
['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link', 'Edit_Gap_Days']
df['Rating'] = pd.to_numeric(df['Star Rating'], errors ='coerce')
df.rename(columns={'Star Rating': 'Rating'}, inplace=True)
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df = df[df['Rating'].between(1, 5)]
df.rename(columns={'Star Rating': 'Rating'}, inplace=True)
df.head()
| Package_Name | App Version Code | App Version Name | Reviewer Language | Device | Review Submit Date and Time | Review Submit Millis Since Epoch | Review Last Update Date and Time | Review Last Update Millis Since Epoch | Star Rating | Review Title | Review Text | Developer Reply Date and Time | Developer Reply Millis Since Epoch | Developer Reply Text | Review Link | Edit_Gap_Days | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | com.app.grow.greener.plants | 4.0 | 1.3 | ru | RE588E | 2024-07-02 12:46:52+00:00 | 1.719924e+12 | 2024-07-02 12:46:52+00:00 | 1.719924e+12 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 1 | com.app.grow.greener.plants | NaN | NaN | ru | RMX1993L1 | 2024-07-02 14:45:11+00:00 | 1.719932e+12 | 2024-07-02 14:45:11+00:00 | 1.719932e+12 | 1.0 | NaN | 5 @01>B05B. | NaN | NaN | NaN | http://play.google.com/console/developers/5478... | 0 |
| 2 | com.app.grow.greener.plants | 13.0 | 2.2 | ru | TECNO-LH6n | 2024-07-04 05:01:01+00:00 | 1.720069e+12 | 2024-07-20 19:32:56+00:00 | 1.721504e+12 | 5.0 | NaN | @8;>65=85 AC?5@ 8=B5@D59A @04C5B 8 70@01>B>: B... | NaN | NaN | NaN | http://play.google.com/console/developers/5478... | 16 |
| 3 | com.app.grow.greener.plants | 4.0 | 1.3 | es | a04e | 2024-07-05 18:21:35+00:00 | 1.720204e+12 | 2024-07-05 18:21:35+00:00 | 1.720204e+12 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | com.app.grow.greener.plants | NaN | NaN | pt | mdh15lm | 2024-07-08 23:59:48+00:00 | 1.720483e+12 | 2024-07-08 23:59:48+00:00 | 1.720483e+12 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
import pandas as pd
import os
# Path to your CSV or Excel file
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output.csv"
# Read file (auto-detect format)
if file_path.endswith('.csv'):
df = pd.read_csv(file_path)
else:
df = pd.read_excel(file_path, engine='openpyxl')
# ✅ STEP 1: Clean column names (remove spaces and normalize for matching)
df.columns = df.columns.str.strip() # Remove spaces around names
# ✅ STEP 2: Check for 'Rating' or 'Star Rating' column
rating_col = None
for col in df.columns:
if col.lower() in ['rating', 'star rating', 'star_rating']:
rating_col = col
break
if rating_col is None:
raise KeyError("No column found matching 'Rating' or 'Star Rating'. Please verify your dataset.")
print(f"Using column: {rating_col}")
# ✅ STEP 3: Clean and filter ratings
df[rating_col] = pd.to_numeric(df[rating_col], errors='coerce') # Convert to numeric
df = df[df[rating_col].between(1, 5)] # Keep only valid ratings 1–5
# ✅ STEP 4: Fill NaNs for other columns if needed
df = df.fillna('')
# ✅ STEP 5: Export cleaned file
output_path = os.path.splitext(file_path)[0] + "_cleaned.csv"
df.to_csv(output_path, index=False)
print(f"\n✅ Cleaned file saved successfully as: {output_path}")
Using column: Star Rating ✅ Cleaned file saved successfully as: C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv
#SENTIMENT ANALYSIS
# ==============================
# STEP 1: Import Dependencies
# ==============================
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
# ==============================
# STEP 2: Load Your CSV File
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv" # <-- update path if needed
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
print("✅ Data loaded successfully!")
print("Shape:", df.shape)
print("\nColumns:", list(df.columns))
print("\nPreview:\n", df.head())
# ==============================
# STEP 3: Rename Columns (Standardize Names)
# ==============================
df.rename(columns={
'Star Rating': 'Rating',
'Review Submit Date and Time': 'Submit_Date',
'Review Last Update Date and Time': 'Last_Update_Date'
}, inplace=True)
# ==============================
# STEP 4: Clean & Convert Date Columns
# ==============================
# Convert messy date strings into proper datetime format
for col in ['Submit_Date', 'Last_Update_Date']:
df[col] = pd.to_datetime(df[col], errors='coerce')
# Drop rows where both dates are missing
df = df.dropna(subset=['Submit_Date', 'Last_Update_Date'], how='all')
# ==============================
# STEP 5: Calculate Time Difference (Editing Gap)
# ==============================
df['Edit_Gap_Days'] = (df['Last_Update_Date'] - df['Submit_Date']).dt.days
df['Edit_Gap_Days'] = df['Edit_Gap_Days'].fillna(0)
# ==============================
# STEP 6: Clean & Normalize Rating Column
# ==============================
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df = df[df['Rating'].between(1, 5)] # Keep only valid ratings (1–5)
# ==============================
# STEP 7: Sentiment Analysis (Optional)
# ==============================
if 'Review Text' in df.columns:
df['Sentiment'] = df['Review Text'].astype(str).apply(lambda x: TextBlob(x).sentiment.polarity)
else:
print("⚠️ 'Review Text' column not found — skipping sentiment analysis.")
# ==============================
# STEP 8: Basic Descriptive Statistics
# ==============================
print("\n===== Rating Summary =====")
print(df)
✅ Data loaded successfully!
Shape: (995, 16)
Columns: ['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link']
Preview:
Package_Name App Version Code App Version Name \
0 com.app.grow.greener.plants 4.0 1.3
1 com.app.grow.greener.plants NaN NaN
2 com.app.grow.greener.plants 4.0 1.3
3 com.app.grow.greener.plants NaN NaN
4 com.app.grow.greener.plants 13.0 2.2
Reviewer Language Device Review Submit Date and Time \
0 ru RE588E 2024-07-02T12:46:52Z
1 ru RMX1993L1 2024-07-02T14:45:11Z
2 es a04e 2024-07-05T18:21:35Z
3 pt mdh15lm 2024-07-08T23:59:48Z
4 pt a10 2024-07-15T15:58:01Z
Review Submit Millis Since Epoch Review Last Update Date and Time \
0 1.719924e+12 2024-07-02T12:46:52Z
1 1.719932e+12 2024-07-02T14:45:11Z
2 1.720204e+12 2024-07-05T18:21:35Z
3 1.720483e+12 2024-07-08T23:59:48Z
4 1.721059e+12 2024-07-15T15:58:09Z
Review Last Update Millis Since Epoch Star Rating Review Title \
0 1.719924e+12 1.0 NaN
1 1.719932e+12 1.0 NaN
2 1.720204e+12 5.0 NaN
3 1.720483e+12 5.0 NaN
4 1.721059e+12 5.0 NaN
Review Text Developer Reply Date and Time \
0 NaN NaN
1 5 @01>B05B. NaN
2 NaN NaN
3 NaN NaN
4 Ótimo aplicativo NaN
Developer Reply Millis Since Epoch Developer Reply Text \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
Review Link
0 NaN
1 http://play.google.com/console/developers/5478...
2 NaN
3 NaN
4 http://play.google.com/console/developers/5478...
===== Rating Summary =====
Package_Name App Version Code App Version Name \
0 com.app.grow.greener.plants 4.0 1.3
1 com.app.grow.greener.plants NaN NaN
2 com.app.grow.greener.plants 4.0 1.3
3 com.app.grow.greener.plants NaN NaN
4 com.app.grow.greener.plants 13.0 2.2
.. ... ... ...
990 com.app.grow.greener.plants 13.0 2.2
991 com.app.grow.greener.plants 13.0 2.2
992 com.app.grow.greener.plants 13.0 2.2
993 com.app.grow.greener.plants 13.0 2.2
994 com.app.grow.greener.plants 13.0 2.2
Reviewer Language Device Submit_Date \
0 ru RE588E 2024-07-02 12:46:52+00:00
1 ru RMX1993L1 2024-07-02 14:45:11+00:00
2 es a04e 2024-07-05 18:21:35+00:00
3 pt mdh15lm 2024-07-08 23:59:48+00:00
4 pt a10 2024-07-15 15:58:01+00:00
.. ... ... ...
990 bg a50 2024-07-21 13:15:33+00:00
991 id star2lte 2024-07-21 13:16:19+00:00
992 ru Infinix-X6516 2024-07-21 13:21:56+00:00
993 id cloud 2024-07-21 13:26:27+00:00
994 hu vicky 2024-07-21 13:29:41+00:00
Review Submit Millis Since Epoch Last_Update_Date \
0 1.719924e+12 2024-07-02 12:46:52+00:00
1 1.719932e+12 2024-07-02 14:45:11+00:00
2 1.720204e+12 2024-07-05 18:21:35+00:00
3 1.720483e+12 2024-07-08 23:59:48+00:00
4 1.721059e+12 2024-07-15 15:58:09+00:00
.. ... ...
990 1.721568e+12 2024-07-21 13:15:33+00:00
991 1.721568e+12 2024-07-21 13:16:19+00:00
992 1.721568e+12 2024-07-21 13:21:56+00:00
993 1.721568e+12 2024-07-21 13:26:27+00:00
994 1.721569e+12 2024-07-21 13:29:41+00:00
Review Last Update Millis Since Epoch Rating Review Title \
0 1.719924e+12 1.0 NaN
1 1.719932e+12 1.0 NaN
2 1.720204e+12 5.0 NaN
3 1.720483e+12 5.0 NaN
4 1.721059e+12 5.0 NaN
.. ... ... ...
990 1.721568e+12 5.0 NaN
991 1.721568e+12 5.0 NaN
992 1.721568e+12 5.0 NaN
993 1.721568e+12 5.0 NaN
994 1.721569e+12 3.0 NaN
Review Text \
0 NaN
1 5 @01>B05B.
2 NaN
3 NaN
4 Ótimo aplicativo
.. ...
990 NaN
991 Semoga terbuki membayar dan sukses terus game nya
992 NaN
993 Saya coba duu
994 Még csak ma kezdtem. Meglátom hogy ér-e öt csi...
Developer Reply Date and Time Developer Reply Millis Since Epoch \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
.. ... ...
990 NaN NaN
991 NaN NaN
992 NaN NaN
993 NaN NaN
994 NaN NaN
Developer Reply Text Review Link \
0 NaN NaN
1 NaN http://play.google.com/console/developers/5478...
2 NaN NaN
3 NaN NaN
4 NaN http://play.google.com/console/developers/5478...
.. ... ...
990 NaN NaN
991 NaN http://play.google.com/console/developers/5478...
992 NaN NaN
993 NaN http://play.google.com/console/developers/5478...
994 NaN http://play.google.com/console/developers/5478...
Edit_Gap_Days Sentiment
0 0 0.0
1 0 0.0
2 0 0.0
3 0 0.0
4 0 0.0
.. ... ...
990 0 0.0
991 0 -0.4
992 0 0.0
993 0 0.0
994 0 0.0
[995 rows x 18 columns]
import pandas as pd
# ==============================
# STEP 1: Load the existing cleaned file
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
print("✅ File loaded successfully.")
print("Before cleaning:", df.shape)
# ==============================
# STEP 2: Drop unwanted columns
# ==============================
cols_to_drop = [
"Review Last Update Millis Since Epoch",
"Developer Reply Date and Time",
"Developer Reply Millis Since Epoch",
"Review Title",
"Developer Reply Text"
]
# Drop columns if they exist
df = df.drop(columns=[c for c in cols_to_drop if c in df.columns], errors='ignore')
# ==============================
# STEP 3: Remove rows with missing key values
# ==============================
required_cols = ["Star Rating", "Review Submit Date and Time", "Review Last Update Date and Time"]
# Drop rows that have NaN in any of these important columns
df = df.dropna(subset=required_cols, how='any')
print("After cleaning:", df.shape)
# ==============================
# STEP 4: Export the cleaned dataset
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"✅ Cleaning complete! File saved at:\n{output_path}")
✅ File loaded successfully. Before cleaning: (995, 16) After cleaning: (995, 11) ✅ Cleaning complete! File saved at: C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv
print(df.columns.tolist())
['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Star Rating', 'Review Text', 'Review Link']
df.head()
| Package_Name | App Version Code | App Version Name | Reviewer Language | Device | Review Submit Date and Time | Review Submit Millis Since Epoch | Review Last Update Date and Time | Star Rating | Review Text | Review Link | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | com.app.grow.greener.plants | 4.0 | 1.3 | ru | RE588E | 2024-07-02T12:46:52Z | 1.719924e+12 | 2024-07-02T12:46:52Z | 1.0 | NaN | NaN |
| 1 | com.app.grow.greener.plants | NaN | NaN | ru | RMX1993L1 | 2024-07-02T14:45:11Z | 1.719932e+12 | 2024-07-02T14:45:11Z | 1.0 | 5 @01>B05B. | http://play.google.com/console/developers/5478... |
| 2 | com.app.grow.greener.plants | 4.0 | 1.3 | es | a04e | 2024-07-05T18:21:35Z | 1.720204e+12 | 2024-07-05T18:21:35Z | 5.0 | NaN | NaN |
| 3 | com.app.grow.greener.plants | NaN | NaN | pt | mdh15lm | 2024-07-08T23:59:48Z | 1.720483e+12 | 2024-07-08T23:59:48Z | 5.0 | NaN | NaN |
| 4 | com.app.grow.greener.plants | 13.0 | 2.2 | pt | a10 | 2024-07-15T15:58:01Z | 1.721059e+12 | 2024-07-15T15:58:09Z | 5.0 | Ótimo aplicativo | http://play.google.com/console/developers/5478... |
# ==============================
# STEP 1: Import Dependencies
# ==============================
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
# ==============================
# STEP 2: Load Your CSV File
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\cleaned_Merged_Output_cleaned.csv" # update if needed
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
print("✅ Data loaded successfully!")
print("Shape:", df.shape)
print("\nColumns:", list(df.columns))
print("\nPreview:\n", df.head())
# ==============================
# STEP 4: Clean & Convert Date Columns
# ==============================
for col in ['Review Submit Date and Time', 'Review Last Update Date and Time']:
df[col] = pd.to_datetime(df[col], errors='coerce')
# Drop rows where both dates are missing
df = df.dropna(subset=['Review Submit Date and Time', 'Review Last Update Date and Time'], how='all')
# ==============================
# STEP 5: Calculate Time Difference (Editing Gap)
# ==============================
df['Edit_Gap_Days'] = (df['Review Last Update Date and Time'] - df['Review Submit Date and Time']).dt.days
df['Edit_Gap_Days'] = df['Edit_Gap_Days'].fillna(0)
# ==============================
# STEP 6: Clean & Normalize Rating Column
# ==============================
df['Star Rating'] = pd.to_numeric(df['Star Rating'], errors='coerce')
df = df[df['Star Rating'].between(1, 5)]
# ==============================
# STEP 8: Basic Descriptive Statistics
# ==============================
print("\n===== Rating Summary =====")
print(df['Star Rating'].describe())
print("\n===== Edit Gap Summary =====")
print(df['Edit_Gap_Days'].describe())
# ==============================
# STEP 9: Visualize Key Insights
# ==============================
# --- Ratings Distribution ---
plt.figure(figsize=(7,4))
df['Star Rating'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
# --- Average Edit Gap by Rating ---
plt.figure(figsize=(7,4))
df.groupby('Star Rating')['Edit_Gap_Days'].mean().plot(kind='bar', color='orange')
plt.title('Average Edit Gap by Rating')
plt.xlabel('Star Rating')
plt.ylabel('Average Edit Gap (Days)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
# ==============================
# STEP 10: Export Cleaned Dataset
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"\n✅ Cleaned and analyzed data saved successfully at:\n{output_path}")
✅ Data loaded successfully!
Shape: (995, 16)
Columns: ['Package_Name', 'App Version Code', 'App Version Name', 'Reviewer Language', 'Device', 'Review Submit Date and Time', 'Review Submit Millis Since Epoch', 'Review Last Update Date and Time', 'Review Last Update Millis Since Epoch', 'Star Rating', 'Review Title', 'Review Text', 'Developer Reply Date and Time', 'Developer Reply Millis Since Epoch', 'Developer Reply Text', 'Review Link']
Preview:
Package_Name App Version Code App Version Name \
0 com.app.grow.greener.plants 4.0 1.3
1 com.app.grow.greener.plants NaN NaN
2 com.app.grow.greener.plants 4.0 1.3
3 com.app.grow.greener.plants NaN NaN
4 com.app.grow.greener.plants 13.0 2.2
Reviewer Language Device Review Submit Date and Time \
0 ru RE588E 2024-07-02T12:46:52Z
1 ru RMX1993L1 2024-07-02T14:45:11Z
2 es a04e 2024-07-05T18:21:35Z
3 pt mdh15lm 2024-07-08T23:59:48Z
4 pt a10 2024-07-15T15:58:01Z
Review Submit Millis Since Epoch Review Last Update Date and Time \
0 1.719924e+12 2024-07-02T12:46:52Z
1 1.719932e+12 2024-07-02T14:45:11Z
2 1.720204e+12 2024-07-05T18:21:35Z
3 1.720483e+12 2024-07-08T23:59:48Z
4 1.721059e+12 2024-07-15T15:58:09Z
Review Last Update Millis Since Epoch Star Rating Review Title \
0 1.719924e+12 1.0 NaN
1 1.719932e+12 1.0 NaN
2 1.720204e+12 5.0 NaN
3 1.720483e+12 5.0 NaN
4 1.721059e+12 5.0 NaN
Review Text Developer Reply Date and Time \
0 NaN NaN
1 5 @01>B05B. NaN
2 NaN NaN
3 NaN NaN
4 Ótimo aplicativo NaN
Developer Reply Millis Since Epoch Developer Reply Text \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
Review Link
0 NaN
1 http://play.google.com/console/developers/5478...
2 NaN
3 NaN
4 http://play.google.com/console/developers/5478...
===== Rating Summary =====
count 995.000000
mean 4.558794
std 0.937327
min 1.000000
25% 5.000000
50% 5.000000
75% 5.000000
max 5.000000
Name: Star Rating, dtype: float64
===== Edit Gap Summary =====
count 995.000000
mean 0.019095
std 0.515979
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 16.000000
Name: Edit_Gap_Days, dtype: float64
✅ Cleaned and analyzed data saved successfully at: C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis.csv
df.head()
| Package_Name | App Version Code | App Version Name | Reviewer Language | Device | Review Submit Date and Time | Review Submit Millis Since Epoch | Review Last Update Date and Time | Review Last Update Millis Since Epoch | Star Rating | Review Title | Review Text | Developer Reply Date and Time | Developer Reply Millis Since Epoch | Developer Reply Text | Review Link | Edit_Gap_Days | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | com.app.grow.greener.plants | 4.0 | 1.3 | ru | RE588E | 2024-07-02 12:46:52+00:00 | 1.719924e+12 | 2024-07-02 12:46:52+00:00 | 1.719924e+12 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 1 | com.app.grow.greener.plants | NaN | NaN | ru | RMX1993L1 | 2024-07-02 14:45:11+00:00 | 1.719932e+12 | 2024-07-02 14:45:11+00:00 | 1.719932e+12 | 1.0 | NaN | 5 @01>B05B. | NaN | NaN | NaN | http://play.google.com/console/developers/5478... | 0 |
| 2 | com.app.grow.greener.plants | 4.0 | 1.3 | es | a04e | 2024-07-05 18:21:35+00:00 | 1.720204e+12 | 2024-07-05 18:21:35+00:00 | 1.720204e+12 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 3 | com.app.grow.greener.plants | NaN | NaN | pt | mdh15lm | 2024-07-08 23:59:48+00:00 | 1.720483e+12 | 2024-07-08 23:59:48+00:00 | 1.720483e+12 | 5.0 | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | com.app.grow.greener.plants | 13.0 | 2.2 | pt | a10 | 2024-07-15 15:58:01+00:00 | 1.721059e+12 | 2024-07-15 15:58:09+00:00 | 1.721059e+12 | 5.0 | NaN | Ótimo aplicativo | NaN | NaN | NaN | http://play.google.com/console/developers/5478... | 0 |
User Retention & Chun
#returning VS one-time users
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')
user_activity
| Device | Review_Count | |
|---|---|---|
| 0 | 1807 | 1 |
| 1 | 1808 | 1 |
| 2 | 1816 | 1 |
| 3 | 1820 | 2 |
| 4 | 1901 | 1 |
| ... | ... | ... |
| 470 | vince | 2 |
| 471 | viva | 1 |
| 472 | water | 1 |
| 473 | willow | 2 |
| 474 | zircon | 1 |
475 rows × 2 columns
Users with Review_Count > 1 → retained users
Users with Review_Count == 1 → churned users (one-time reviewers)
#Count and Tabulate Retained vs One-Time User
import pandas as pd
# Load your cleaned CSV
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
# Ensure the key column exists
if 'Device' not in df.columns:
raise ValueError("❌ 'Device' column not found in dataset!")
# Count number of reviews per device (user)
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')
# Classify users
user_activity['User_Type'] = user_activity['Review_Count'].apply(
lambda x: 'Retained (Reviewed > 1 time)' if x > 1 else 'One-time (Churned)'
)
# Tabulate summary
summary = user_activity['User_Type'].value_counts().reset_index()
summary.columns = ['User_Type', 'User_Count']
# Add retention rate
total_users = summary['User_Count'].sum()
summary['Percentage'] = (summary['User_Count'] / total_users * 100).round(2)
print("===== USER RETENTION & CHURN SUMMARY =====")
print(summary)
# Optional: Export to CSV
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv"
summary.to_csv(output_path, index=False)
print(f"\n✅ Summary saved to: {output_path}")
===== USER RETENTION & CHURN SUMMARY =====
User_Type User_Count Percentage
0 One-time (Churned) 263 55.37
1 Retained (Reviewed > 1 time) 212 44.63
✅ Summary saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv
import pandas as pd
import matplotlib.pyplot as plt
# ==============================
# STEP 1: Load Your Cleaned CSV
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Final_Review_Analysis_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
# ==============================
# STEP 2: Check for 'Device' Column
# ==============================
if 'Device' not in df.columns:
raise ValueError("❌ 'Device' column not found in dataset!")
# ==============================
# STEP 3: Count Reviews per User (Device)
# ==============================
user_activity = df.groupby('Device').size().reset_index(name='Review_Count')
# ==============================
# STEP 4: Classify Users
# ==============================
user_activity['User_Type'] = user_activity['Review_Count'].apply(
lambda x: 'Retained (Reviewed > 1 time)' if x > 1 else 'One-time (Churned)'
)
# ==============================
# STEP 5: Tabulate Summary
# ==============================
summary = user_activity['User_Type'].value_counts().reset_index()
summary.columns = ['User_Type', 'User_Count']
# Add retention percentage
total_users = summary['User_Count'].sum()
summary['Percentage'] = (summary['User_Count'] / total_users * 100).round(2)
print("===== USER RETENTION & CHURN SUMMARY =====")
print(summary)
# ==============================
# STEP 6: Visualization
# ==============================
# --- Bar Chart ---
plt.figure(figsize=(7,4))
plt.bar(summary['User_Type'], summary['User_Count'], color=['green', 'red'])
plt.title('User Retention vs Churn')
plt.xlabel('User Type')
plt.ylabel('User Count')
plt.grid(axis='y', linestyle='--', alpha=0.7)
for i, v in enumerate(summary['User_Count']):
plt.text(i, v + (v * 0.01), str(v), ha='center', fontweight='bold')
plt.show()
# --- Pie Chart ---
plt.figure(figsize=(5,5))
plt.pie(summary['User_Count'], labels=summary['User_Type'], autopct='%1.1f%%',
colors=['green', 'red'], startangle=90, explode=(0.05, 0))
plt.title('User Retention vs Churn (Percentage)')
plt.show()
# ==============================
# STEP 7: Export Summary
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv"
summary.to_csv(output_path, index=False)
print(f"\n✅ Summary and charts generated successfully! File saved to:\n{output_path}")
===== USER RETENTION & CHURN SUMMARY =====
User_Type User_Count Percentage
0 One-time (Churned) 263 55.37
1 Retained (Reviewed > 1 time) 212 44.63
✅ Summary and charts generated successfully! File saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\User_Retention_Summary.csv
# Save as PNG
plt.savefig("sample_plot.png", dpi=300, bbox_inches='tight')
# Save as PDF
plt.savefig("sample_plot.pdf", bbox_inches='tight')
<Figure size 640x480 with 0 Axes>
#Install & Install Trends
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid") # prettier plots
# Replace paths with your actual CSV file locations
csv_files = [
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install\install_july.csv",
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_Aug.csv",
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_sept.csv",
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_oct.csv",
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_nov.csv",
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install_dec.csv"
]
cols_to_keep = ['Date', 'Package Name', 'Daily Device Installs', 'Daily Device Uninstalls', 'Daily Device Upgrades', 'Active Device Installs']
import pandas as pd
import glob
# Folder containing your monthly CSVs (July–December)
path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install"
# Collect all CSVs in that folder
files = glob.glob(path + "/*.csv")
# Target columns to keep
cols_to_keep = [
"Date", "Package Name", "Daily Device Installs",
"Daily Device Uninstalls", "Total User Installs"
]
merged_data = []
for file in files:
try:
temp = pd.read_csv(file, encoding='utf-8', low_memory=False)
except UnicodeDecodeError:
temp = pd.read_csv(file, encoding='latin1', low_memory=False)
# Normalize column names
temp.columns = temp.columns.str.strip().str.lower()
# Map possible column variations
rename_map = {
'package name': 'Package Name',
'package_name': 'Package Name',
'date': 'Date',
'daily device installs': 'Daily Device Installs',
'daily_device_installs': 'Daily Device Installs',
'daily device uninstalls': 'Daily Device Uninstalls',
'daily_device_uninstalls': 'Daily Device Uninstalls',
'total user installs': 'Total User Installs',
'total_user_installs': 'Total User Installs'
}
temp.rename(columns=rename_map, inplace=True)
# Keep only required columns
available_cols = [col for col in cols_to_keep if col in temp.columns]
temp = temp[available_cols]
# Add a column to track which month file this came from
temp['Source_File'] = file.split("\\")[-1]
merged_data.append(temp)
# Merge all months
df = pd.concat(merged_data, ignore_index=True)
# Clean date column
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
# Sort by date
df = df.sort_values(by='Date')
print("✅ Data merged successfully!")
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nMonths included:")
print(df['Source_File'].unique())
# Save cleaned merged file
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Install_Uninstall_Data.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"\n✅ Cleaned and merged data saved to:\n{output_path}")
✅ Data merged successfully! Shape: (8776, 6) Columns: ['Date', 'Package Name', 'Daily Device Installs', 'Daily Device Uninstalls', 'Total User Installs', 'Source_File'] Months included: ['install_Aug.csv' 'install_july.csv' 'install_sept.csv' 'install_oct.csv' 'install_nov.csv' 'install_dec.csv'] ✅ Cleaned and merged data saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Install_Uninstall_Data.csv
import pandas as pd
import glob
folder_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install.csv"
files = glob.glob(folder_path + "/*.csv")
for file in files:
df = pd.read_csv(file, encoding='utf-8', low_memory=False)
print(f"\n📂 {file}")
print(df.columns.tolist())
#verify all months are included
# Extract unique months and their record counts
merged_df["Month"] = merged_df["Date"].dt.month_name()
month_summary = merged_df["Month"].value_counts().sort_index()
print("\n===== MONTHLY DATA COVERAGE =====")
print(month_summary)
print("\n📆 Unique months found:", merged_df["Month"].unique())
===== MONTHLY DATA COVERAGE ===== Month August 124 December 486 July 2482 November 30 October 31 Name: count, dtype: int64 📆 Unique months found: ['August' 'December' 'July' 'November' 'October']
import os
folder_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install" # adjust this
files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]
print("All CSV files found:\n", files)
All CSV files found: ['install_Aug.csv', 'install_dec.csv', 'install_july.csv', 'install_nov.csv', 'install_oct.csv', 'install_sept.csv']
pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv", encoding='utf-8', low_memory=False)
| Date | Package name | Carrier | Daily Device Installs | Daily Device Uninstalls | Daily Device Upgrades | Total User Installs | Daily User Installs | Daily User Uninstalls | Active Device Installs | ... | Uninstall events | Month | Android OS Version | Country | Package Name | Daily Crashes | Daily ANRs | App Version Code | Source_File | Country_Full | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7/2/2024 | com.app.grow.greener.plants | NaN | 6.0 | 0.0 | 0.0 | 0.0 | 4.0 | 3.0 | 3.0 | ... | 4.0 | Install_july | NaN | BG | NaN | NaN | NaN | NaN | NaN | Bulgaria |
| 1 | 7/2/2024 | com.app.grow.greener.plants | NaN | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | BR | NaN | NaN | NaN | NaN | NaN | Brazil |
| 2 | 7/2/2024 | com.app.grow.greener.plants | NaN | 4.0 | 0.0 | 0.0 | 0.0 | 3.0 | 9.0 | 2.0 | ... | 9.0 | Install_july | NaN | DE | NaN | NaN | NaN | NaN | NaN | Germany |
| 3 | 7/2/2024 | com.app.grow.greener.plants | NaN | 2.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | HU | NaN | NaN | NaN | NaN | NaN | Hungary |
| 4 | 7/2/2024 | com.app.grow.greener.plants | NaN | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | MY | NaN | NaN | NaN | NaN | NaN | Malaysia |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2470 | 7/31/2024 | com.app.grow.greener.plants | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | XK | NaN | NaN | NaN | NaN | NaN | XK |
| 2471 | 7/31/2024 | com.app.grow.greener.plants | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | YE | NaN | NaN | NaN | NaN | NaN | Yemen |
| 2472 | 7/31/2024 | com.app.grow.greener.plants | NaN | 2.0 | 0.0 | 0.0 | 0.0 | 2.0 | 3.0 | 27.0 | ... | 4.0 | Install_july | NaN | ZA | NaN | NaN | NaN | NaN | NaN | South Africa |
| 2473 | 7/31/2024 | com.app.grow.greener.plants | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | ... | 0.0 | Install_july | NaN | ZM | NaN | NaN | NaN | NaN | NaN | Zambia |
| 2474 | 7/31/2024 | com.app.grow.greener.plants | NaN | 6.0 | 0.0 | 0.0 | 0.0 | 8.0 | 5.0 | 27.0 | ... | 5.0 | Install_july | NaN | ZZ | NaN | NaN | NaN | NaN | NaN | ZZ |
2475 rows × 22 columns
print("===== MONTHLY DATA COVERAGE =====")
print(merged_df['Month'].value_counts())
print("\n📆 Unique months found:", merged_df['Month'].unique())
===== MONTHLY DATA COVERAGE ===== Month Install_aug 5685 Install_july 2482 Install_dec 486 Install_sept 62 Install_oct 31 Install_nov 30 Name: count, dtype: int64 📆 Unique months found: ['Install_aug' 'Install_dec' 'Install_july' 'Install_nov' 'Install_oct' 'Install_sept']
import pandas as pd
import glob
path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install"
files = glob.glob(path + "/*.csv")
merged_data = []
for file in files:
df = pd.read_csv(file, encoding='latin1', low_memory=False)
# Optional: Extract month from filename
month_name = os.path.basename(file).split('.')[0] # Example: "September"
df['Month'] = month_name.capitalize()
merged_data.append(df)
# Combine all
merged_df = pd.concat(merged_data, ignore_index=True)
for file in files:
df = pd.read_csv(file, encoding='latin1', low_memory=False)
print(os.path.basename(file), "→", len(df), "rows")
install_Aug.csv → 5685 rows install_dec.csv → 486 rows install_july.csv → 2482 rows install_nov.csv → 30 rows install_oct.csv → 31 rows install_sept.csv → 62 rows
print("Total merged rows:", len(merged_df))
Total merged rows: 17552
import os
import pandas as pd
import glob
# 🔹 Set your folder path
path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\install" # change this to your actual folder
# 🔹 Find all CSV files in the folder
files = glob.glob(os.path.join(path, "*.csv"))
# 🔹 Print all detected files with row counts
print("===== FILES INCLUDED IN MERGE =====")
for file in files:
try:
temp = pd.read_csv(file, encoding='latin1', low_memory=False)
print(f"{os.path.basename(file):<30} | Rows: {len(temp)}")
except Exception as e:
print(f"{os.path.basename(file):<30} | ⚠️ Error reading file: {e}")
# 🔹 Check the unique months in the final merged dataframe (if you already merged)
print("\n===== MONTHS PRESENT IN MERGED DATA =====")
print(merged_df['Month'].value_counts())
print("\n📆 Unique months found:", merged_df['Month'].unique())
===== FILES INCLUDED IN MERGE ===== install_Aug.csv | Rows: 5685 install_dec.csv | Rows: 486 install_july.csv | Rows: 2482 install_nov.csv | Rows: 30 install_oct.csv | Rows: 31 install_sept.csv | Rows: 62 ===== MONTHS PRESENT IN MERGED DATA ===== Month Install_aug 5685 Install_july 2482 Install_dec 486 Install_sept 62 Install_oct 31 Install_nov 30 Name: count, dtype: int64 📆 Unique months found: ['Install_aug' 'Install_dec' 'Install_july' 'Install_nov' 'Install_oct' 'Install_sept']
# 🔹 Set your output path (update to your folder)
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv"
# 🔹 Save the merged dataframe
merged_df.to_csv(output_path, index=False, encoding='utf-8')
print(f"✅ Merged CSV saved successfully at:\n{output_path}")
✅ Merged CSV saved successfully at: C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv
Monthly Trend Analysis of Install& Uninstall
import pandas as pd
import matplotlib.pyplot as plt
# ==============================
# Step 1: Load & Clean Data
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
# Keep only relevant columns
df = df[['Date', 'Daily Device Installs', 'Daily Device Uninstalls']]
# Remove NaN and duplicates
df.dropna(subset=['Date', 'Daily Device Installs', 'Daily Device Uninstalls'], inplace=True)
df.drop_duplicates(inplace=True)
# ==============================
# Step 2: Daily Trend Analysis
# ==============================
daily_summary = df.groupby('Date')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()
print("\n===== Daily Summary =====")
print(daily_summary.describe())
# Plot daily trends
plt.figure(figsize=(12,6))
plt.plot(daily_summary['Date'], daily_summary['Daily Device Installs'], label='Daily Installs', marker='o')
plt.plot(daily_summary['Date'], daily_summary['Daily Device Uninstalls'], label='Daily Uninstalls', marker='x')
plt.title('Daily Install & Uninstall Trends')
plt.xlabel('Date')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
===== Daily Summary =====
Date Daily Device Installs Daily Device Uninstalls
count 30 30.000000 30.0
mean 2024-07-16 12:00:00 1450.833333 0.0
min 2024-07-02 00:00:00 14.000000 0.0
25% 2024-07-09 06:00:00 25.500000 0.0
50% 2024-07-16 12:00:00 1173.000000 0.0
75% 2024-07-23 18:00:00 2644.750000 0.0
max 2024-07-31 00:00:00 4308.000000 0.0
std NaN 1522.512920 0.0
# Step 3: Weekly Trend Analysis
# ==============================
df['Year_Week'] = df['Date'].dt.to_period('W').apply(lambda r: r.start_time)
weekly_summary = df.groupby('Year_Week')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()
# Plot weekly trends
plt.figure(figsize=(12,6))
plt.plot(weekly_summary['Year_Week'], weekly_summary['Daily Device Installs'], label='Weekly Installs', marker='o')
plt.plot(weekly_summary['Year_Week'], weekly_summary['Daily Device Uninstalls'], label='Weekly Uninstalls', marker='x')
plt.title('Weekly Install & Uninstall Trends')
plt.xlabel('Week Start Date')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
# ==============================
# Step 4: Monthly Trend Analysis (Calendar Month)
# ==============================
df['Year_Month'] = df['Date'].dt.to_period('M').astype(str)
monthly_summary = df.groupby('Year_Month')[['Daily Device Installs', 'Daily Device Uninstalls']].sum().reset_index()
# Plot monthly trends
plt.figure(figsize=(10,5))
plt.bar(monthly_summary['Year_Month'], monthly_summary['Daily Device Installs'], label='Monthly Installs', alpha=0.7)
plt.bar(monthly_summary['Year_Month'], monthly_summary['Daily Device Uninstalls'], label='Monthly Uninstalls', alpha=0.7)
plt.title('Monthly Install & Uninstall Trends')
plt.xlabel('Month')
plt.ylabel('Number of Devices')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()
# ==============================
# Step 5: Statistical Summary
# ==============================
print("\n===== Statistical Summary =====")
print("Daily Installs & Uninstalls:")
print(daily_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
print("\nWeekly Installs & Uninstalls:")
print(weekly_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
print("\nMonthly Installs & Uninstalls:")
print(monthly_summary[['Daily Device Installs', 'Daily Device Uninstalls']].describe())
===== Statistical Summary =====
Daily Installs & Uninstalls:
Daily Device Installs Daily Device Uninstalls
count 122.000000 122.0
mean 2047.639344 0.0
std 2176.634012 0.0
min 6.000000 0.0
25% 40.000000 0.0
50% 1451.000000 0.0
75% 2899.000000 0.0
max 8301.000000 0.0
Weekly Installs & Uninstalls:
Daily Device Installs Daily Device Uninstalls
count 19.000000 19.0
mean 13148.000000 0.0
std 13572.012771 0.0
min 55.000000 0.0
25% 1011.000000 0.0
50% 7789.000000 0.0
75% 18626.000000 0.0
max 41503.000000 0.0
Monthly Installs & Uninstalls:
Daily Device Installs Daily Device Uninstalls
count 4.000000 4.0
mean 62453.000000 0.0
std 41559.269291 0.0
min 13949.000000 0.0
25% 37709.000000 0.0
50% 63197.000000 0.0
75% 87941.000000 0.0
max 109469.000000 0.0
Country/region-based usage distribution
Prepare a mapping of country codes to full names
pip install pycountry
import pandas as pd
import pycountry
# ==============================
# STEP 1: Load your merged CSV
# ==============================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
# ==============================
# STEP 2: Remove rows with empty Country
# ==============================
df = df[df['Country'].notna() & (df['Country'].str.strip() != '')]
# ==============================
# STEP 3: Map country codes to full names
# ==============================
def code_to_country(code):
try:
return pycountry.countries.get(alpha_2=code.upper()).name
except:
return code # fallback if code not found
df['Country_Full'] = df['Country'].apply(code_to_country)
# ==============================
# STEP 4: Save updated CSV
# ==============================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv"
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"✅ Updated CSV saved at:\n{output_path}")
# Optional preview
print(df[['Country', 'Country_Full']].head())
✅ Updated CSV saved at:
C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv
Country Country_Full
6171 BG Bulgaria
6172 BR Brazil
6173 DE Germany
6174 HU Hungary
6175 MY Malaysia
df.head()
| Date | Package name | Carrier | Daily Device Installs | Daily Device Uninstalls | Daily Device Upgrades | Total User Installs | Daily User Installs | Daily User Uninstalls | Active Device Installs | ... | Uninstall events | Month | Android OS Version | Country | Package Name | Daily Crashes | Daily ANRs | App Version Code | Source_File | Country_Full | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6171 | 7/2/2024 | com.app.grow.greener.plants | NaN | 6.0 | 0.0 | 0.0 | 0.0 | 4.0 | 3.0 | 3.0 | ... | 4.0 | Install_july | NaN | BG | NaN | NaN | NaN | NaN | NaN | Bulgaria |
| 6172 | 7/2/2024 | com.app.grow.greener.plants | NaN | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | BR | NaN | NaN | NaN | NaN | NaN | Brazil |
| 6173 | 7/2/2024 | com.app.grow.greener.plants | NaN | 4.0 | 0.0 | 0.0 | 0.0 | 3.0 | 9.0 | 2.0 | ... | 9.0 | Install_july | NaN | DE | NaN | NaN | NaN | NaN | NaN | Germany |
| 6174 | 7/2/2024 | com.app.grow.greener.plants | NaN | 2.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | HU | NaN | NaN | NaN | NaN | NaN | Hungary |
| 6175 | 7/2/2024 | com.app.grow.greener.plants | NaN | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | Install_july | NaN | MY | NaN | NaN | NaN | NaN | NaN | Malaysia |
5 rows × 22 columns
region-based usage distribution based on installation events
import pandas as pd
# Assuming your output DataFrame is named 'df'
# (replace 'df' with your actual DataFrame name)
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv" # 👈 change this to your preferred location
# Save the DataFrame to CSV
df.to_csv(output_path, index=False, encoding='utf-8')
print(f"✅ CSV file successfully saved to:\n{output_path}")
✅ CSV file successfully saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\Merged_Monthly_Data_Cleaned.csv
df.head()
| country | country_full | install_events | Year_Month | Region | |
|---|---|---|---|---|---|
| 0 | BG | Bulgaria | 7.0 | 2024-02 | Other |
| 1 | BR | Brazil | 1.0 | 2024-02 | South America |
| 2 | DE | Germany | 11.0 | 2024-02 | Europe |
| 3 | HU | Hungary | 2.0 | 2024-02 | Other |
| 4 | MY | Malaysia | 1.0 | 2024-02 | Other |
# ===========================================
# STEP 1: Import Libraries
# ===========================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# ===========================================
# STEP 2: Load Dataset
# ===========================================
file_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\July_data.csv"
df = pd.read_csv(file_path, encoding='utf-8', low_memory=False)
print("✅ File loaded. Shape:", df.shape)
# ===========================================
# STEP 3: Normalize Column Names
# ===========================================
df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
# Check available columns
print("\n📋 Columns available:")
print(df.columns.tolist())
# ===========================================
# STEP 4: Diagnose the Date Column
# ===========================================
# Try to find the column that holds date information
possible_dates = [col for col in df.columns if 'date' in col]
print("\n🔍 Possible date columns:", possible_dates)
# If there's a 'date' column, inspect unique formats
date_col = possible_dates[0] if possible_dates else None
if not date_col:
raise ValueError("No 'date' column found. Please verify your dataset.")
print("\nSample date values:")
print(df[date_col].head(10))
# ===========================================
# STEP 5: Clean and Convert Date
# ===========================================
# Try to convert; if fails, coerce to NaT
df[date_col] = pd.to_datetime(df[date_col], errors='coerce', dayfirst=True)
# If some are still NaT, try parsing as Excel serial or text
if df[date_col].isna().sum() > 0:
try:
df[date_col] = pd.to_datetime(df[date_col].astype(float), origin='1899-12-30', unit='D')
except:
pass
# Drop rows without valid dates
df = df.dropna(subset=[date_col])
print(f"\n✅ Dates cleaned. Total valid rows: {len(df)}")
# ===========================================
# STEP 6: Extract Month and Year
# ===========================================
df['Year_Month'] = df[date_col].dt.to_period('M').astype(str)
print("\n📆 Unique months found after cleaning:")
print(df['Year_Month'].unique())
# ===========================================
# STEP 7: Clean and Prepare for Analysis
# ===========================================
# Keep only relevant columns
columns_needed = ['country', 'country_full', 'install_events', 'Year_Month']
available_cols = [col for col in columns_needed if col in df.columns]
df = df[available_cols]
# Convert installs to numeric
df['install_events'] = pd.to_numeric(df['install_events'], errors='coerce')
df = df.dropna(subset=['install_events'])
df = df[df['install_events'] > 0]
# ===========================================
# STEP 8: Region Mapping
# ===========================================
region_mapping = {
"Nigeria": "Africa", "Kenya": "Africa", "Ghana": "Africa", "South Africa": "Africa", "Egypt": "Africa",
"Germany": "Europe", "France": "Europe", "United Kingdom": "Europe", "Spain": "Europe", "Italy": "Europe",
"India": "Asia", "China": "Asia", "Japan": "Asia", "Indonesia": "Asia", "Philippines": "Asia", "Singapore": "Asia",
"United States": "North America", "Canada": "North America", "Mexico": "North America",
"Brazil": "South America", "Argentina": "South America", "Chile": "South America",
"Australia": "Oceania", "New Zealand": "Oceania"
}
df['Region'] = df['country_full'].map(region_mapping).fillna('Other')
# ===========================================
# STEP 9: Group by Month & Region
# ===========================================
monthly_region = df.groupby(['Year_Month', 'Region'])['install_events'].sum().reset_index()
# ===========================================
# STEP 10: Validate Month Coverage
# ===========================================
print("\n🗓️ Months appearing in grouped data:")
print(monthly_region['Year_Month'].unique())
# ===========================================
# STEP 11: Visualization
# ===========================================
plt.figure(figsize=(12, 6))
sns.lineplot(data=monthly_region, x='Year_Month', y='install_events', hue='Region', marker='o')
plt.title("July Install Events by Region")
plt.xlabel("Month")
plt.ylabel("Total Install Events")
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()
# ===========================================
# STEP 12: Save Cleaned File
# ===========================================
output_path = r"C:\Users\hp\Desktop\WEEK 3_DSHub\Cleaned_Region_Trend_Analysis.csv"
df.to_csv(output_path, index=False)
print(f"\n✅ Cleaned data with proper dates saved to:\n{output_path}")
✅ File loaded. Shape: (2475, 22) 📋 Columns available: ['date', 'package_name', 'carrier', 'daily_device_installs', 'daily_device_uninstalls', 'daily_device_upgrades', 'total_user_installs', 'daily_user_installs', 'daily_user_uninstalls', 'active_device_installs', 'install_events', 'update_events', 'uninstall_events', 'month', 'android_os_version', 'country', 'package_name', 'daily_crashes', 'daily_anrs', 'app_version_code', 'source_file', 'country_full'] 🔍 Possible date columns: ['date', 'update_events'] Sample date values: 0 7/2/2024 1 7/2/2024 2 7/2/2024 3 7/2/2024 4 7/2/2024 5 7/2/2024 6 7/2/2024 7 7/3/2024 8 7/3/2024 9 7/3/2024 Name: date, dtype: object ✅ Dates cleaned. Total valid rows: 293 📆 Unique months found after cleaning: ['2024-02' '2024-03' '2024-04' '2024-05' '2024-06' '2024-07' '2024-08' '2024-09' '2024-10' '2024-11' '2024-12'] 🗓️ Months appearing in grouped data: ['2024-02' '2024-03' '2024-04' '2024-05' '2024-06' '2024-07' '2024-08' '2024-09' '2024-10' '2024-11' '2024-12']
✅ Cleaned data with proper dates saved to: C:\Users\hp\Desktop\WEEK 3_DSHub\Cleaned_Region_Trend_Analysis.csv
App Performance Analysis
import pandas as pd
import matplotlib.pyplot as plt
# === Load CSV files ===
df_2024 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv")
df_2025 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv")
# === Ensure Date column is datetime ===
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')
# === Add Year & Month columns ===
df_2024['Year'] = df_2024['Date'].dt.year
df_2024['Month'] = df_2024['Date'].dt.month_name()
df_2025['Year'] = df_2025['Date'].dt.year
df_2025['Month'] = df_2025['Date'].dt.month_name()
# Confirm structure
print(df_2024.head())
print(df_2025.head())
Date Package Name Android OS Version Daily Crashes \
0 2024-07-05 com.app.grow.greener.plants NaN 0.0
1 2024-07-08 com.app.grow.greener.plants NaN 0.0
2 2024-07-10 com.app.grow.greener.plants NaN 1.0
3 2024-07-11 com.app.grow.greener.plants NaN 1.0
4 2024-07-15 com.app.grow.greener.plants NaN 0.0
Daily ANRs Month Device Year
0 1.0 July TECNO TECNO-KI7 (TECNO SPARK 10 Pro) 2024
1 1.0 July samsung a13 (Galaxy A13) 2024
2 0.0 July HONOR HNVNE-M (HONOR X6) 2024
3 0.0 July Lenovo X306X (Lenovo Tab M10 HD (2nd Gen)) 2024
4 1.0 July TECNO TECNO-KG6P (TECNO SPARK 8T) 2024
Date Package Name Android OS Version Daily Crashes \
0 2025-02-01 com.app.grow.greener.plants Android 6.0 0.0
1 2025-02-01 com.app.grow.greener.plants Android 7.1 15.0
2 2025-02-01 com.app.grow.greener.plants Android 8.0 2.0
3 2025-02-01 com.app.grow.greener.plants Android 8.1 1.0
4 2025-02-01 com.app.grow.greener.plants Android 9 13.0
Daily ANRs Month Device Year
0 8.0 February NaN 2025
1 6.0 February NaN 2025
2 1.0 February NaN 2025
3 6.0 February NaN 2025
4 8.0 February NaN 2025
import pandas as pd
import re
# Load your CSV file
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv")
# Define a function to clean and extract device names
def clean_device_name(device):
if pd.isna(device):
return "" # avoid NaN output
# Try to extract text within parentheses
match = re.search(r"\((.*?)\)", device)
if match:
cleaned = match.group(1).strip()
else:
# If no parentheses, use the part before or entire text
cleaned = device.strip()
# Remove extra spaces and ensure distinct clean format
cleaned = re.sub(r"\s+", " ", cleaned)
return cleaned
# Apply the function to the Device column
df["Cleaned_Device"] = df["Device"].apply(clean_device_name)
# Drop duplicates and reset index (optional, for analysis)
df = df.drop_duplicates(subset=["Cleaned_Device"]).reset_index(drop=True)
# Replace any remaining NaN values with blank strings
df["Cleaned_Device"] = df["Cleaned_Device"].fillna("")
# Save cleaned data to a new file
df.to_csv("Cleaned_Device_Data.csv", index=False)
print("✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'")
print(df[["Device", "Cleaned_Device"]].head())
✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'
Device Cleaned_Device
0 TECNO TECNO-KI7 (TECNO SPARK 10 Pro) TECNO SPARK 10 Pro
1 samsung a13 (Galaxy A13) Galaxy A13
2 HONOR HNVNE-M (HONOR X6) HONOR X6
3 Lenovo X306X (Lenovo Tab M10 HD (2nd Gen)) Lenovo Tab M10 HD (2nd Gen
4 TECNO TECNO-KG6P (TECNO SPARK 8T) TECNO SPARK 8T
import pandas as pd
import re
# Load your CSV file
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv")
# Define a function to clean and extract device names
def clean_device_name(device):
if pd.isna(device):
return "" # avoid NaN output
# Try to extract text within parentheses
match = re.search(r"\((.*?)\)", device)
if match:
cleaned = match.group(1).strip()
else:
# If no parentheses, use the part before or entire text
cleaned = device.strip()
# Remove extra spaces and ensure distinct clean format
cleaned = re.sub(r"\s+", " ", cleaned)
return cleaned
# Apply the function to the Device column
df["Cleaned_Device"] = df["Device"].apply(clean_device_name)
# Drop duplicates and reset index (optional, for analysis)
df = df.drop_duplicates(subset=["Cleaned_Device"]).reset_index(drop=True)
# Replace any remaining NaN values with blank strings
df["Cleaned_Device"] = df["Cleaned_Device"].fillna("")
# Save cleaned data to a new file
df.to_csv("Cleaned_Device_Data.csv", index=False)
print("✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'")
print(df[["Device", "Cleaned_Device"]].head())
✅ Cleaning complete! File saved as 'Cleaned_Device_Data.csv'
Device Cleaned_Device
0 NaN
1 1902 1902
2 HWDUB-Q HWDUB-Q
3 Infinix-X6516 Infinix-X6516
4 Infinix-X665C Infinix-X665C
Retention/Chun
def retention_churn_analysis(df, year):
df = df.copy()
df = df.sort_values(by='Date')
# Group all devices by month
month_device = df.groupby('Month')['Device'].apply(set).reset_index()
summary = []
prev_devices = set()
for i, row in month_device.iterrows():
month = row['Month']
current_devices = row['Device']
new_users = len(current_devices - prev_devices)
retained_users = len(current_devices & prev_devices)
churned_users = len(prev_devices - current_devices)
total_active = len(current_devices)
retention_rate = (retained_users / (retained_users + new_users) * 100) if (retained_users + new_users) > 0 else 0
churn_rate = (churned_users / (len(prev_devices) + 1e-9) * 100) if len(prev_devices) > 0 else 0
summary.append({
'Year': year,
'Month': month,
'Total Active Devices': total_active,
'New Devices': new_users,
'Retained Devices': retained_users,
'Churned Devices': churned_users,
'Retention Rate (%)': round(retention_rate, 2),
'Churn Rate (%)': round(churn_rate, 2)
})
prev_devices = current_devices
return pd.DataFrame(summary)
retention_2024 = retention_churn_analysis(df_2024, 2024)
retention_2025 = retention_churn_analysis(df_2025, 2025)
# Combine summaries
combined_retention = pd.concat([retention_2024, retention_2025])
combined_retention.reset_index(drop=True, inplace=True)
combined_retention
| Year | Month | Total Active Devices | New Devices | Retained Devices | Churned Devices | Retention Rate (%) | Churn Rate (%) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2024 | July | 782 | 782 | 0 | 0 | 0.0 | 0.0 |
| 1 | 2025 | February | 1 | 1 | 0 | 0 | 0.0 | 0.0 |
| 2 | 2025 | May | 156 | 156 | 0 | 1 | 0.0 | 100.0 |
df_2024 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv", encoding='utf-8', parse_dates=['Date'], dayfirst=True)
df_2025 = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv", encoding='utf-8', parse_dates=['Date'], dayfirst=True)
print(df_2024['Date'].head(10))
0 2024-07-05 1 2024-07-08 2 2024-07-10 3 2024-07-11 4 2024-07-15 5 2024-07-15 6 2024-07-15 7 2024-07-15 8 2024-07-15 9 2024-07-15 Name: Date, dtype: object
monthly-level (July) retention/churn analysis based on Device activity
import pandas as pd
df_2024 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
encoding='utf-8'
)
# Show the first 10 unique date entries and column names
print("📋 Columns:", df_2024.columns.tolist())
print("\n🔍 Sample date values:")
print(df_2024['Date'].head(10))
📋 Columns: ['Date', 'Package Name', 'Android OS Version', 'Daily Crashes', 'Daily ANRs', 'Month', 'Device', 'Year'] 🔍 Sample date values: 0 2024-07-05 1 2024-07-08 2 2024-07-10 3 2024-07-11 4 2024-07-15 5 2024-07-15 6 2024-07-15 7 2024-07-15 8 2024-07-15 9 2024-07-15 Name: Date, dtype: object
import pandas as pd
import matplotlib.pyplot as plt
# Load dataset
df_2024 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
encoding='utf-8'
)
# Convert Date column to datetime
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
# Filter only July 2024 data
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]
# Drop missing or invalid device entries
df_july_2024 = df_july_2024.dropna(subset=['Device'])
# Calculate active devices per day
daily_devices = df_july_2024.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
# Calculate Retention Rate (% change from previous day)
daily_devices['Retention_Rate_%'] = daily_devices['Active_Devices'].pct_change().fillna(0) * 100
# Calculate simple churn (negative of retention if drop)
daily_devices['Churn_Rate_%'] = daily_devices['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)
# Display summary
print("\n Daily Retention & Churn (July 2024):")
print(daily_devices.describe())
# Plot
plt.figure(figsize=(10,6))
plt.plot(daily_devices['Date'], daily_devices['Active_Devices'], marker='o', label='Active Devices', linewidth=2)
plt.title(" User Retention & Device Activity - July 2024")
plt.xlabel("Date")
plt.ylabel("Number of Active Devices")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
Daily Retention & Churn (July 2024):
Date Active_Devices Retention_Rate_% \
count 21 21.000000 21.000000
mean 2024-07-20 05:42:51.428571392 133.476190 56.532700
min 2024-07-05 00:00:00 1.000000 -11.173184
25% 2024-07-16 00:00:00 31.000000 -7.291667
50% 2024-07-21 00:00:00 178.000000 0.000000
75% 2024-07-26 00:00:00 195.000000 19.018405
max 2024-07-31 00:00:00 238.000000 500.000000
std NaN 87.277499 141.029997
Churn_Rate_%
count 21.000000
mean 2.763988
min 0.000000
25% 0.000000
50% 0.000000
75% 7.291667
max 11.173184
std 4.323564
import pandas as pd
import matplotlib.pyplot as plt
# Load 2025 dataset
df_2025 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
encoding='utf-8'
)
# Convert 'Date' to datetime (format: Month-Day-Year)
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')
# Drop invalid dates and devices
df_2025 = df_2025.dropna(subset=['Date', 'Device'])
# Add Month-Year column
df_2025['YearMonth'] = df_2025['Date'].dt.to_period('M')
print(" Unique Year-Months in 2025 data:", df_2025['YearMonth'].unique())
# Group by Date to count active devices per day
daily_devices_2025 = df_2025.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
# Compute Retention and Churn rates
daily_devices_2025['Retention_Rate_%'] = daily_devices_2025['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2025['Churn_Rate_%'] = daily_devices_2025['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)
# --- Statistics Summary ---
print("\n Daily Retention & Churn Statistics (2025):")
print(daily_devices_2025.describe())
# --- Visualization ---
plt.figure(figsize=(10,6))
plt.plot(daily_devices_2025['Date'], daily_devices_2025['Active_Devices'], marker='o', label='Active Devices', linewidth=2, color='blue')
plt.title(" User Retention & Device Activity - 2025 ( May)")
plt.xlabel("Date")
plt.ylabel("Number of Active Devices")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
Unique Year-Months in 2025 data: <PeriodArray>
['2025-05']
Length: 1, dtype: period[M]
Daily Retention & Churn Statistics (2025):
Date Active_Devices Retention_Rate_% Churn_Rate_%
count 11 11.000000 11.000000 11.000000
mean 2025-05-06 00:00:00 25.454545 15.118602 6.090092
min 2025-05-01 00:00:00 10.000000 -41.176471 0.000000
25% 2025-05-03 12:00:00 17.500000 -7.644110 0.000000
50% 2025-05-06 00:00:00 19.000000 1.923077 0.000000
75% 2025-05-08 12:00:00 27.500000 34.803922 7.644110
max 2025-05-11 00:00:00 53.000000 80.000000 41.176471
std NaN 14.514569 37.347213 12.376602
APP PERFORMANCE ANALYSIS
import pandas as pd
import matplotlib.pyplot as plt
# Load dataset
df_2024 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
encoding='utf-8'
)
# Convert Date column to datetime
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
# Filter July 2024
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]
# Group by Date and summarize performance
stability_stats = df_july_2024.groupby('Date').agg({
'Daily ANRs': 'sum',
'Daily Crashes': 'sum',
'Device': 'nunique'
}).reset_index()
stability_stats['Crash_Rate_%'] = (stability_stats['Daily Crashes'] / stability_stats['Device']) * 100
stability_stats['ANR_Rate_%'] = (stability_stats['Daily ANRs'] / stability_stats['Device']) * 100
# Display summary statistics
print("\n📊 App Performance Metrics (July 2024):")
print(stability_stats.describe())
# Plot trends
plt.figure(figsize=(10,6))
plt.plot(stability_stats['Date'], stability_stats['Crash_Rate_%'], marker='o', label='Crash Rate (%)', color='red')
plt.plot(stability_stats['Date'], stability_stats['ANR_Rate_%'], marker='o', label='ANR Rate (%)', color='orange')
plt.title(" App Performance(Daily) - July 2024")
plt.xlabel("Date")
plt.ylabel("Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
📊 App Performance Metrics (July 2024):
Date Daily ANRs Daily Crashes Device \
count 21 21.000000 21.000000 21.000000
mean 2024-07-20 05:42:51.428571392 113.047619 162.904762 133.476190
min 2024-07-05 00:00:00 0.000000 0.000000 1.000000
25% 2024-07-16 00:00:00 27.000000 32.000000 31.000000
50% 2024-07-21 00:00:00 141.000000 199.000000 178.000000
75% 2024-07-26 00:00:00 177.000000 254.000000 195.000000
max 2024-07-31 00:00:00 224.000000 386.000000 238.000000
std NaN 76.494756 116.267753 87.277499
Crash_Rate_% ANR_Rate_%
count 21.000000 21.000000
mean 101.192355 78.437533
min 0.000000 0.000000
25% 100.000000 79.354839
50% 110.695187 85.937500
75% 126.380368 88.995215
max 162.184874 100.000000
std 45.703656 27.407571
import pandas as pd
import matplotlib.pyplot as plt
# Load dataset again for clarity
df_2025 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
encoding='utf-8'
)
# Convert Date column
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')
# Filter only valid data
df_2025 = df_2025.dropna(subset=['Date', 'Device'])
# --- Group by Date and compute metrics ---
stability_stats_2025 = df_2025.groupby('Date').agg({
'Daily ANRs': 'sum',
'Daily Crashes': 'sum',
'Device': 'nunique'
}).reset_index()
# Compute performance rates
stability_stats_2025['Crash_Rate_%'] = (stability_stats_2025['Daily Crashes'] / stability_stats_2025['Device']) * 100
stability_stats_2025['ANR_Rate_%'] = (stability_stats_2025['Daily ANRs'] / stability_stats_2025['Device']) * 100
# --- Statistics Summary ---
print("\n App Performance Statistics (2025):")
print(stability_stats_2025.describe())
# --- Visualization ---
plt.figure(figsize=(10,6))
plt.plot(stability_stats_2025['Date'], stability_stats_2025['Crash_Rate_%'], marker='o', label='Crash Rate (%)', color='red')
plt.plot(stability_stats_2025['Date'], stability_stats_2025['ANR_Rate_%'], marker='o', label='ANR Rate (%)', color='orange')
plt.title(" App Performance(Daily) - 2025 ( May)")
plt.xlabel("Date")
plt.ylabel("Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
App Performance Statistics (2025):
Date Daily ANRs Daily Crashes Device \
count 11 11.000000 11.000000 11.000000
mean 2025-05-06 00:00:00 20.454545 27.818182 25.454545
min 2025-05-01 00:00:00 11.000000 3.000000 10.000000
25% 2025-05-03 12:00:00 15.500000 10.000000 17.500000
50% 2025-05-06 00:00:00 18.000000 12.000000 19.000000
75% 2025-05-08 12:00:00 25.500000 36.000000 27.500000
max 2025-05-11 00:00:00 34.000000 86.000000 53.000000
std NaN 8.430464 31.250018 14.514569
Crash_Rate_% ANR_Rate_%
count 11.000000 11.000000
mean 84.849635 95.987084
min 30.000000 28.846154
25% 52.661064 65.408805
50% 61.111111 85.714286
75% 116.832175 110.882353
max 167.647059 188.235294
std 51.398566 48.984473
DASHBOARDS
#Retention & Churn Dashboard( July 2024)
# ======================================
# USER RETENTION VS CHURN ANALYSIS
# Dataset: Final_review_analysis_cleaned.csv
# ======================================
import pandas as pd
import numpy as np
import plotly.express as px
# -------------------------------
# LOAD AND CLEAN DATA
# -------------------------------
# Load CSV
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\Final_Review_Analysis_Cleaned.csv")
# Keep only relevant columns
df = df[['Device', 'Review Submit Date and Time', 'Review Last Update Date and Time', 'Star Rating']]
# Rename columns for simplicity
df.columns = ['Device', 'Submit_Date', 'Update_Date', 'Star_Rating']
# Clean & convert date columns to datetime
df['Submit_Date'] = pd.to_datetime(df['Submit_Date'], errors='coerce')
df['Update_Date'] = pd.to_datetime(df['Update_Date'], errors='coerce')
# Drop rows with invalid or missing dates
df = df.dropna(subset=['Submit_Date', 'Update_Date'])
# Extract only the date part (no time)
df['Submit_Date'] = df['Submit_Date'].dt.date
df['Update_Date'] = df['Update_Date'].dt.date
print(" Data cleaned successfully!")
print("Columns:", df.columns.tolist())
print("\nSample:")
print(df.head())
# -------------------------------
# DEFINE ACTIVE vs INACTIVE USERS
# -------------------------------
# Rule:
# If a device has a review update on a given day → Active
# If a device has no update activity for 7+ days → Inactive
# Create daily activity summary
daily_activity = df.groupby('Update_Date')['Device'].nunique().reset_index()
daily_activity.columns = ['Date', 'Active_Users']
# Sort by date
daily_activity = daily_activity.sort_values('Date')
# Compute churn (drop in active users)
daily_activity['Previous_Active'] = daily_activity['Active_Users'].shift(1)
daily_activity['Churn'] = daily_activity['Previous_Active'] - daily_activity['Active_Users']
# Any negative churn (growth) should be 0
daily_activity['Churn'] = daily_activity['Churn'].apply(lambda x: x if x > 0 else 0)
# Calculate retention rate (percentage of users retained)
daily_activity['Retention_Rate'] = (
daily_activity['Active_Users'] / daily_activity['Previous_Active'] * 100
).round(2)
# Fill first day NA values
daily_activity.fillna({'Previous_Active': 0, 'Churn': 0, 'Retention_Rate': 100}, inplace=True)
# -------------------------------
# ADD ACTIVE/INACTIVE USER STATUS
# -------------------------------
# Approximation: inactive users = total devices - active on that day
total_devices = df['Device'].nunique()
daily_activity['Inactive_Users'] = total_devices - daily_activity['Active_Users']
print("\n Daily Retention & Churn Data:")
print(daily_activity.head())
# -------------------------------
# VISUALIZE WITH DASHBOARD-LIKE PLOTS
# -------------------------------
# Plot 1: Active vs Inactive Users
fig_activity = px.line(
daily_activity,
x='Date',
y=['Active_Users', 'Inactive_Users'],
title=' Daily Active vs Inactive Users',
markers=True
)
fig_activity.update_layout(xaxis_title='Date', yaxis_title='User Count')
fig_activity.show()
# Plot 2: Retention and Churn Trends
fig_retention = px.bar(
daily_activity,
x='Date',
y='Retention_Rate',
title=' Daily Retention Rate (%)',
color='Retention_Rate',
color_continuous_scale='Viridis'
)
fig_retention.update_layout(xaxis_title='Date', yaxis_title='Retention (%)')
fig_retention.show()
fig_churn = px.line(
daily_activity,
x='Date',
y='Churn',
title=' Daily User Churn Count',
markers=True,
line_shape='spline'
)
fig_churn.update_layout(xaxis_title='Date', yaxis_title='Churn Count')
fig_churn.show()
# -------------------------------
# STATISTICAL SUMMARY
# -------------------------------
print("\n Statistical Summary:")
print(daily_activity[['Active_Users', 'Inactive_Users', 'Churn', 'Retention_Rate']].describe())
print(f"\nTotal Unique Devices: {total_devices}")
Data cleaned successfully!
Columns: ['Device', 'Submit_Date', 'Update_Date', 'Star_Rating']
Sample:
Device Submit_Date Update_Date Star_Rating
0 RE588E 2024-07-02 2024-07-02 1.0
1 RMX1993L1 2024-07-02 2024-07-02 1.0
2 a04e 2024-07-05 2024-07-05 5.0
3 mdh15lm 2024-07-08 2024-07-08 5.0
4 a10 2024-07-15 2024-07-15 5.0
Daily Retention & Churn Data:
Date Active_Users Previous_Active Churn Retention_Rate \
0 2024-07-02 2 0.0 0.0 100.00
1 2024-07-05 1 2.0 1.0 50.00
2 2024-07-08 1 1.0 0.0 100.00
3 2024-07-15 14 1.0 0.0 1400.00
4 2024-07-16 50 14.0 0.0 357.14
Inactive_Users
0 473
1 474
2 474
3 461
4 425
Statistical Summary:
Active_Users Inactive_Users Churn Retention_Rate
count 10.000000 10.000000 10.000000 10.00000
mean 77.500000 397.500000 12.600000 262.08800
std 77.386835 77.386835 31.690167 410.21605
min 1.000000 279.000000 0.000000 41.52000
25% 5.000000 323.250000 0.000000 90.43000
50% 60.500000 414.500000 0.000000 107.99000
75% 151.750000 470.000000 0.750000 192.25000
max 196.000000 474.000000 100.000000 1400.00000
Total Unique Devices: 475
FRUT GARDEN INSIGHT OVERVIEW DASHBOARD
pip install dash plotly pandas
Requirement already satisfied: dash in c:\users\hp\appdata\local\anaconda3\lib\site-packages (3.2.0) Requirement already satisfied: plotly in c:\users\hp\appdata\local\anaconda3\lib\site-packages (5.24.1) Requirement already satisfied: pandas in c:\users\hp\appdata\local\anaconda3\lib\site-packages (2.3.3) Requirement already satisfied: Flask<3.2,>=1.0.4 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (3.1.0) Requirement already satisfied: Werkzeug<3.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (3.1.3) Requirement already satisfied: importlib-metadata in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (8.5.0) Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (4.12.2) Requirement already satisfied: requests in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (2.32.3) Requirement already satisfied: retrying in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (1.4.2) Requirement already satisfied: nest-asyncio in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (1.6.0) Requirement already satisfied: setuptools in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from dash) (72.1.0) Requirement already satisfied: Jinja2>=3.1.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (3.1.6) Requirement already satisfied: itsdangerous>=2.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (2.2.0) Requirement already satisfied: click>=8.1.3 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (8.1.8) Requirement already satisfied: blinker>=1.9 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Flask<3.2,>=1.0.4->dash) (1.9.0) Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from Werkzeug<3.2->dash) (3.0.2) Requirement already satisfied: tenacity>=6.2.0 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from plotly) (9.0.0) Requirement already satisfied: packaging in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from plotly) (24.2) Requirement already satisfied: numpy>=1.26.0 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2.1.3) Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2024.1) Requirement already satisfied: tzdata>=2022.7 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: colorama in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from click>=8.1.3->Flask<3.2,>=1.0.4->dash) (0.4.6) Requirement already satisfied: six>=1.5 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0) Requirement already satisfied: zipp>=3.20 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from importlib-metadata->dash) (3.21.0) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (3.3.2) Requirement already satisfied: idna<4,>=2.5 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (3.7) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (2.3.0) Requirement already satisfied: certifi>=2017.4.17 in c:\users\hp\appdata\local\anaconda3\lib\site-packages (from requests->dash) (2025.10.5) Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import plotly.express as px
from dash import Dash, html, dcc
# =============================
# Load and prepare 2024 dataset
# =============================
df_2024 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2024.csv",
encoding='utf-8'
)
df_2024['Date'] = pd.to_datetime(df_2024['Date'], errors='coerce')
df_2024 = df_2024.dropna(subset=['Date', 'Device'])
# --- July 2024 Retention ---
df_july_2024 = df_2024[(df_2024['Date'].dt.month == 7) & (df_2024['Date'].dt.year == 2024)]
daily_devices_2024 = df_july_2024.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
daily_devices_2024['Retention_Rate_%'] = daily_devices_2024['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2024['Churn_Rate_%'] = daily_devices_2024['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)
# --- July 2024 Stability ---
stability_stats = df_july_2024.groupby('Date').agg({
'Daily ANRs': 'sum',
'Daily Crashes': 'sum',
'Device': 'nunique'
}).reset_index()
stability_stats['Crash_Rate_%'] = (stability_stats['Daily Crashes'] / stability_stats['Device']) * 100
stability_stats['ANR_Rate_%'] = (stability_stats['Daily ANRs'] / stability_stats['Device']) * 100
# =============================
# Load and prepare 2025 dataset
# =============================
df_2025 = pd.read_csv(
r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\App_Performance_2025.csv",
encoding='utf-8'
)
df_2025['Date'] = pd.to_datetime(df_2025['Date'], errors='coerce')
df_2025 = df_2025.dropna(subset=['Date', 'Device'])
daily_devices_2025 = df_2025.groupby('Date')['Device'].nunique().reset_index(name='Active_Devices')
daily_devices_2025['Retention_Rate_%'] = daily_devices_2025['Active_Devices'].pct_change().fillna(0) * 100
daily_devices_2025['Churn_Rate_%'] = daily_devices_2025['Retention_Rate_%'].apply(lambda x: abs(x) if x < 0 else 0)
# =============================
# Create Visualizations
# =============================
fig1 = px.line(
daily_devices_2024,
x='Date', y='Active_Devices',
title=" Active Devices - July 2024",
markers=True,
line_shape='linear',
color_discrete_sequence=['#1f77b4']
)
fig1.update_traces(line=dict(width=4))
fig2 = px.line(
stability_stats,
x='Date', y=['Crash_Rate_%', 'ANR_Rate_%'],
title="⚙️ App Stability Metrics - July 2024",
markers=True,
color_discrete_sequence=['#d62728', '#ff7f0e']
)
fig2.update_traces(line=dict(width=4))
fig3 = px.line(
daily_devices_2025,
x='Date', y='Active_Devices',
title="📈 Active Devices - 2025 (All Months)",
markers=True,
color_discrete_sequence=['#2ca02c']
)
fig3.update_traces(line=dict(width=4))
# =============================
# Build Dashboard Layout
# =============================
app = Dash(__name__)
app.title = "App Performance Dashboard"
app.layout = html.Div([
html.H1(" App Performance Dashboard", style={'textAlign': 'center', 'color': '#003366'}),
html.Div([
html.H3("User Retention & Activity (2024 July)", style={'color': '#1f77b4'}),
dcc.Graph(figure=fig1)
], style={'margin': '20px'}),
html.Div([
html.H3("App Stability (Crash & ANR Rates)", style={'color': '#d62728'}),
dcc.Graph(figure=fig2)
], style={'margin': '20px'}),
html.Div([
html.H3("User Retention & Activity (2025)", style={'color': '#2ca02c'}),
dcc.Graph(figure=fig3)
], style={'margin': '20px'})
])
# =============================
# Run the Dashboard
# =============================
if __name__ == "__main__":
app.run(debug=True)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
print("="*70)
print("BATCH 1: DATA LOADING & TIME SERIES ANALYSIS")
print("="*70)
# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
# Check available columns
print("\nAvailable columns in dataset:")
print(df.columns.tolist())
# Strip whitespace from column names
df.columns = df.columns.str.strip()
# Identify the columns we need
required_base = ['Daily ANRs', 'Android OS Version', 'Daily Crashes']
date_col = None
device_col = None
# Find date column
for col in df.columns:
if 'date' in col.lower():
date_col = col
break
# Find device column
for col in df.columns:
if 'device' in col.lower():
device_col = col
break
print(f"\nIdentified columns:")
print(f" Date column: {date_col}")
print(f" Device column: {device_col}")
print(f" Required columns: {required_base}")
# Select only needed columns
columns_to_keep = required_base.copy()
if date_col:
columns_to_keep.insert(0, date_col)
if device_col:
columns_to_keep.append(device_col)
# Keep only required columns and drop the rest
df = df[columns_to_keep].copy()
print(f"\nColumns retained for analysis: {df.columns.tolist()}")
print(f"Original dataset size: {len(df):,} records")
# Data preprocessing
if date_col:
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.dropna(subset=[date_col])
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
print(f"Cleaned dataset size: {len(df):,} records")
print(f"\nData date range: {df['Date'].min()} to {df['Date'].max()}")
# Display sample of cleaned data
print("\nSample of cleaned data (first 5 rows):")
print(df.head())
print("\nBasic statistics:")
print(df[['Daily Crashes', 'Daily ANRs']].describe())
# ===========================================================================
# A. HIGH-LEVEL STABILITY METRICS
# ===========================================================================
print("\n" + "="*70)
print("A. HIGH-LEVEL STABILITY METRICS")
print("="*70)
# 1. TIME SERIES ANALYSIS: Daily Crashes and ANRs aggregated by date
print("\n1. TIME SERIES ANALYSIS - Daily Crashes and ANRs")
print("-" * 70)
daily_agg = df.groupby('Date').agg({
'Daily Crashes': 'sum',
'Daily ANRs': 'sum'
}).reset_index().sort_values('Date')
print(f"\nTime Period: {daily_agg['Date'].min().strftime('%Y-%m-%d')} to {daily_agg['Date'].max().strftime('%Y-%m-%d')}")
print(f"Total Days: {len(daily_agg)}")
print(f"\nTotal Crashes (entire period): {daily_agg['Daily Crashes'].sum():,.0f}")
print(f"Total ANRs (entire period): {daily_agg['Daily ANRs'].sum():,.0f}")
print(f"Average Daily Crashes: {daily_agg['Daily Crashes'].mean():.2f}")
print(f"Average Daily ANRs: {daily_agg['Daily ANRs'].mean():.2f}")
print(f"Max Daily Crashes: {daily_agg['Daily Crashes'].max():.0f} on {daily_agg.loc[daily_agg['Daily Crashes'].idxmax(), 'Date'].strftime('%Y-%m-%d')}")
print(f"Max Daily ANRs: {daily_agg['Daily ANRs'].max():.0f} on {daily_agg.loc[daily_agg['Daily ANRs'].idxmax(), 'Date'].strftime('%Y-%m-%d')}")
# Create time series visualization
fig, axes = plt.subplots(2, 1, figsize=(15, 9))
# Crashes over time
axes[0].plot(daily_agg['Date'], daily_agg['Daily Crashes'],
marker='o', linewidth=2.5, markersize=5, color='#e74c3c',
markerfacecolor='white', markeredgewidth=2)
axes[0].fill_between(daily_agg['Date'], daily_agg['Daily Crashes'],
alpha=0.3, color='#e74c3c')
axes[0].set_title('Daily Crashes Over Time (2024-2025)',
fontsize=15, fontweight='bold', pad=15)
axes[0].set_ylabel('Total Daily Crashes', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.4, linestyle='--')
axes[0].tick_params(axis='x', rotation=45, labelsize=10)
axes[0].tick_params(axis='y', labelsize=10)
# Add mean line
mean_crashes = daily_agg['Daily Crashes'].mean()
axes[0].axhline(y=mean_crashes, color='red', linestyle='--',
linewidth=2, alpha=0.7, label=f'Mean: {mean_crashes:.1f}')
axes[0].legend(loc='upper right', fontsize=10)
# ANRs over time
axes[1].plot(daily_agg['Date'], daily_agg['Daily ANRs'],
marker='s', linewidth=2.5, markersize=5, color='#f39c12',
markerfacecolor='white', markeredgewidth=2)
axes[1].fill_between(daily_agg['Date'], daily_agg['Daily ANRs'],
alpha=0.3, color='#f39c12')
axes[1].set_title('Daily ANRs Over Time (2024-2025)',
fontsize=15, fontweight='bold', pad=15)
axes[1].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Total Daily ANRs', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.4, linestyle='--')
axes[1].tick_params(axis='x', rotation=45, labelsize=10)
axes[1].tick_params(axis='y', labelsize=10)
# Add mean line
mean_anrs = daily_agg['Daily ANRs'].mean()
axes[1].axhline(y=mean_anrs, color='orange', linestyle='--',
linewidth=2, alpha=0.7, label=f'Mean: {mean_anrs:.1f}')
axes[1].legend(loc='upper right', fontsize=10)
plt.tight_layout()
plt.savefig('batch1_time_series_crashes_anrs.png', dpi=300, bbox_inches='tight')
print("\n✓ Time series chart saved: batch1_time_series_crashes_anrs.png")
plt.close()
# Monthly breakdown
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['YearMonth'] = df['Date'].dt.to_period('M')
monthly_summary = df.groupby('YearMonth').agg({
'Daily Crashes': 'sum',
'Daily ANRs': 'sum'
}).reset_index()
monthly_summary['YearMonth'] = monthly_summary['YearMonth'].astype(str)
print("\n\nMonthly Summary:")
print(monthly_summary.to_string(index=False))
print("\n" + "="*70)
print("BATCH 1 COMPLETE - Time Series Analysis Done")
print("="*70)
======================================================================
BATCH 1: DATA LOADING & TIME SERIES ANALYSIS
======================================================================
Available columns in dataset:
['Date', 'Package Name', 'Android OS Version', 'Daily Crashes', 'Daily ANRs', 'Month', 'Device']
Identified columns:
Date column: Date
Device column: Device
Required columns: ['Daily ANRs', 'Android OS Version', 'Daily Crashes']
Columns retained for analysis: ['Date', 'Daily ANRs', 'Android OS Version', 'Daily Crashes', 'Device']
Original dataset size: 3,392 records
Cleaned dataset size: 3,391 records
Data date range: 2024-07-05 00:00:00 to 2025-05-11 00:00:00
Sample of cleaned data (first 5 rows):
Date Daily ANRs Android OS Version Daily Crashes \
0 2025-02-01 8.0 Android 6.0 0.0
1 2025-02-01 6.0 Android 7.1 15.0
2 2025-02-01 1.0 Android 8.0 2.0
3 2025-02-01 6.0 Android 8.1 1.0
4 2025-02-01 8.0 Android 9 13.0
Device
0 TECNO TECNO-KI7 (TECNO SPARK 10 Pro)
1 samsung a13 (Galaxy A13)
2 HONOR HNVNE-M (HONOR X6)
3 Lenovo X306X (Lenovo Tab M10 HD (2nd Gen))
4 TECNO TECNO-KG6P (TECNO SPARK 8T)
Basic statistics:
Daily Crashes Daily ANRs
count 3391.00000 3391.000000
mean 2.04335 2.695960
std 5.03289 13.818733
min 0.00000 0.000000
25% 0.00000 0.000000
50% 1.00000 1.000000
75% 2.00000 1.000000
max 77.00000 185.000000
======================================================================
A. HIGH-LEVEL STABILITY METRICS
======================================================================
1. TIME SERIES ANALYSIS - Daily Crashes and ANRs
----------------------------------------------------------------------
Time Period: 2024-07-05 to 2025-05-11
Total Days: 60
Total Crashes (entire period): 6,929
Total ANRs (entire period): 9,142
Average Daily Crashes: 115.48
Average Daily ANRs: 152.37
Max Daily Crashes: 386 on 2024-07-25
Max Daily ANRs: 368 on 2025-02-22
✓ Time series chart saved: batch1_time_series_crashes_anrs.png
Monthly Summary:
YearMonth Daily Crashes Daily ANRs
2024-07 3421.0 2374.0
2025-02 3202.0 6543.0
2025-05 306.0 225.0
======================================================================
BATCH 1 COMPLETE - Time Series Analysis Done
======================================================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find date column
date_col = [col for col in df.columns if 'date' in col.lower()][0]
# Keep required columns
df = df[['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
# Aggregate by date
daily_agg = df.groupby('Date').agg({
'Daily Crashes': 'sum',
'Daily ANRs': 'sum'
}).reset_index().sort_values('Date')
# PLOT: Time Series Chart
fig, axes = plt.subplots(2, 1, figsize=(15, 9))
# Crashes over time
axes[0].plot(daily_agg['Date'], daily_agg['Daily Crashes'],
marker='o', linewidth=2.5, markersize=5, color='#e74c3c',
markerfacecolor='white', markeredgewidth=2)
axes[0].fill_between(daily_agg['Date'], daily_agg['Daily Crashes'],
alpha=0.3, color='#e74c3c')
axes[0].set_title('Daily Crashes Over Time (2024-2025)',
fontsize=15, fontweight='bold', pad=15)
axes[0].set_ylabel('Total Daily Crashes', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.4, linestyle='--')
axes[0].tick_params(axis='x', rotation=45, labelsize=10)
axes[0].tick_params(axis='y', labelsize=10)
# Add mean line
mean_crashes = daily_agg['Daily Crashes'].mean()
axes[0].axhline(y=mean_crashes, color='red', linestyle='--',
linewidth=2, alpha=0.7, label=f'Mean: {mean_crashes:.1f}')
axes[0].legend(loc='upper right', fontsize=10)
# ANRs over time
axes[1].plot(daily_agg['Date'], daily_agg['Daily ANRs'],
marker='s', linewidth=2.5, markersize=5, color='#f39c12',
markerfacecolor='white', markeredgewidth=2)
axes[1].fill_between(daily_agg['Date'], daily_agg['Daily ANRs'],
alpha=0.3, color='#f39c12')
axes[1].set_title('Daily ANRs Over Time (2024-2025)',
fontsize=15, fontweight='bold', pad=15)
axes[1].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Total Daily ANRs', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.4, linestyle='--')
axes[1].tick_params(axis='x', rotation=45, labelsize=10)
axes[1].tick_params(axis='y', labelsize=10)
# Add mean line
mean_anrs = daily_agg['Daily ANRs'].mean()
axes[1].axhline(y=mean_anrs, color='orange', linestyle='--',
linewidth=2, alpha=0.7, label=f'Mean: {mean_anrs:.1f}')
axes[1].legend(loc='upper right', fontsize=10)
plt.tight_layout()
plt.savefig('batch1_time_series_crashes_anrs.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch1_time_series_crashes_anrs.png")
✓ Chart saved: batch1_time_series_crashes_anrs.png
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
print("="*70)
print("BATCH 2: CRASH RATE ANALYSIS BY OS VERSION & DEVICE")
print("="*70)
# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
print(f"Dataset loaded: {len(df):,} records")
# ===========================================================================
# 2. CRASH RATE BY ANDROID OS VERSION
# ===========================================================================
print("\n2. CRASH RATE BY ANDROID OS VERSION")
print("-" * 70)
os_metrics = df.groupby('Android OS Version').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_metrics.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
# Calculate crash rate (crashes per record)
os_metrics['Crash_Rate'] = (os_metrics['Total_Crashes'] / os_metrics['Record_Count']) * 100
os_metrics['ANR_Rate'] = (os_metrics['Total_ANRs'] / os_metrics['Record_Count']) * 100
# Sort by crash rate
os_metrics = os_metrics.sort_values('Crash_Rate', ascending=False)
print("\nAndroid OS Version Performance Metrics:")
print(os_metrics.to_string(index=False))
# Visualization - OS Version Crash Rate
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Crash Rate
bars1 = axes[0].barh(range(len(os_metrics)), os_metrics['Crash_Rate'],
color='#3498db', alpha=0.8, edgecolor='black')
# Highlight top 3 worst
for i in range(min(3, len(bars1))):
bars1[i].set_color('#e74c3c')
bars1[i].set_alpha(1.0)
axes[0].set_yticks(range(len(os_metrics)))
axes[0].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Crash Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()
# Add value labels
for i, v in enumerate(os_metrics['Crash_Rate']):
axes[0].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
# ANR Rate
bars2 = axes[1].barh(range(len(os_metrics)), os_metrics['ANR_Rate'],
color='#9b59b6', alpha=0.8, edgecolor='black')
# Highlight top 3 worst
for i in range(min(3, len(bars2))):
bars2[i].set_color('#f39c12')
bars2[i].set_alpha(1.0)
axes[1].set_yticks(range(len(os_metrics)))
axes[1].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('ANR Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()
# Add value labels
for i, v in enumerate(os_metrics['ANR_Rate']):
axes[1].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('batch2_crash_rate_by_os.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch2_crash_rate_by_os.png")
plt.close()
# ===========================================================================
# 3. CRASH RATE BY DEVICE (if Device column exists)
# ===========================================================================
if 'Device' in df.columns:
print("\n\n3. CRASH RATE BY DEVICE")
print("-" * 70)
device_metrics = df.groupby('Device').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
device_metrics.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
device_metrics['Crash_Rate'] = (device_metrics['Total_Crashes'] / device_metrics['Record_Count']) * 100
device_metrics['ANR_Rate'] = (device_metrics['Total_ANRs'] / device_metrics['Record_Count']) * 100
device_metrics = device_metrics.sort_values('Crash_Rate', ascending=False)
print("\nDevice Performance Metrics:")
print(device_metrics.to_string(index=False))
# Visualization - Device Crash Rate (Top 15)
top_devices = device_metrics.head(15)
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
# Crash Rate
bars1 = axes[0].barh(range(len(top_devices)), top_devices['Crash_Rate'],
color='#3498db', alpha=0.8, edgecolor='black')
for i in range(min(5, len(bars1))):
bars1[i].set_color('#e74c3c')
bars1[i].set_alpha(1.0)
axes[0].set_yticks(range(len(top_devices)))
axes[0].set_yticklabels(top_devices['Device'], fontsize=9)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Top 15 Devices by Crash Rate', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()
# ANR Rate
bars2 = axes[1].barh(range(len(top_devices)), top_devices['ANR_Rate'],
color='#9b59b6', alpha=0.8, edgecolor='black')
for i in range(min(5, len(bars2))):
bars2[i].set_color('#f39c12')
bars2[i].set_alpha(1.0)
axes[1].set_yticks(range(len(top_devices)))
axes[1].set_yticklabels(top_devices['Device'], fontsize=9)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 15 Devices by ANR Rate', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()
plt.tight_layout()
plt.savefig('batch2_crash_rate_by_device.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch2_crash_rate_by_device.png")
plt.close()
else:
print("\n\n3. DEVICE ANALYSIS SKIPPED")
print("-" * 70)
print("No Device column found in dataset.")
print("\n" + "="*70)
print("BATCH 2 COMPLETE - Crash Rate Analysis Done")
print("="*70)
======================================================================
BATCH 2: CRASH RATE ANALYSIS BY OS VERSION & DEVICE
======================================================================
Dataset loaded: 3,391 records
2. CRASH RATE BY ANDROID OS VERSION
----------------------------------------------------------------------
Android OS Version Performance Metrics:
OS_Version Total_Crashes Avg_Crashes Record_Count Total_ANRs Avg_ANRs Crash_Rate ANR_Rate
Android 14 1076.0 38.428571 28 4176.0 149.142857 3842.857143 14914.285714
Android 9 679.0 24.250000 28 182.0 6.500000 2425.000000 650.000000
Android 13 381.0 13.607143 28 325.0 11.607143 1360.714286 1160.714286
Android 12 290.0 10.357143 28 562.0 20.071429 1035.714286 2007.142857
Android 11 238.0 8.500000 28 375.0 13.392857 850.000000 1339.285714
Android 10 195.0 6.964286 28 330.0 11.785714 696.428571 1178.571429
Android 8.1 135.0 4.821429 28 243.0 8.678571 482.142857 867.857143
Android 7.1 93.0 4.043478 23 64.0 2.782609 404.347826 278.260870
Android 8.0 49.0 2.227273 22 21.0 0.954545 222.727273 95.454545
Android 15 44.0 1.571429 28 180.0 6.428571 157.142857 642.857143
Android 6.0 16.0 0.761905 21 48.0 2.285714 76.190476 228.571429
Android 7.0 6.0 0.352941 17 35.0 2.058824 35.294118 205.882353
Android 12L 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
✓ Chart saved: batch2_crash_rate_by_os.png
3. CRASH RATE BY DEVICE
----------------------------------------------------------------------
Device Performance Metrics:
Device Total_Crashes Avg_Crashes Record_Count Total_ANRs Avg_ANRs Crash_Rate ANR_Rate
samsung r5q (Galaxy S10 Lite) 46.0 46.000000 1 165.0 165.000000 4600.000000 16500.000000
KDDI SOV35 (Xperia XZs) 36.0 36.000000 1 144.0 144.000000 3600.000000 14400.000000
Redmi miel (Redmi Note 11S) 63.0 31.500000 2 185.0 92.500000 3150.000000 9250.000000
Blackview BV9200 (BV9200) 26.0 26.000000 1 8.0 8.000000 2600.000000 800.000000
Xiaomi plato (Xiaomi 12T) 22.0 22.000000 1 138.0 138.000000 2200.000000 13800.000000
ALLVIEW X8_Soul_Style (X8 Soul Style) 36.0 18.000000 2 138.0 69.000000 1800.000000 6900.000000
Infinix Infinix-X652A (S5) 17.0 17.000000 1 4.0 4.000000 1700.000000 400.000000
KDDI SOG02 (Xperia 5 II) 17.0 17.000000 1 18.0 18.000000 1700.000000 1800.000000
samsung gt58wifi (Galaxy Tab A 8.0) 17.0 17.000000 1 6.0 6.000000 1700.000000 600.000000
HONOR HNCLK-Q (HONOR X7b) 16.0 16.000000 1 1.0 1.000000 1600.000000 100.000000
motorola cancun (moto g14) 79.0 15.800000 5 154.0 30.800000 1580.000000 3080.000000
motorola albus (Moto Z (2) Play) 44.0 14.666667 3 16.0 5.333333 1466.666667 533.333333
POCO vayu (POCO X3 Pro) 66.0 13.200000 5 161.0 32.200000 1320.000000 3220.000000
TECNO TECNO-KG6P (TECNO SPARK 8T) 13.0 13.000000 1 8.0 8.000000 1300.000000 800.000000
POCO surya (POCO X3 NFC) 37.0 12.333333 3 7.0 2.333333 1233.333333 233.333333
realme RE5887 (realme C30) 35.0 11.666667 3 0.0 0.000000 1166.666667 0.000000
vivo 2111 (V2111) 46.0 11.500000 4 49.0 12.250000 1150.000000 1225.000000
samsung a30s (Galaxy A30s) 46.0 11.500000 4 148.0 37.000000 1150.000000 3700.000000
samsung a10 (Galaxy A10) 57.0 11.400000 5 157.0 31.400000 1140.000000 3140.000000
vivo 2027 (V2029) 55.0 11.000000 5 166.0 33.200000 1100.000000 3320.000000
OPPO OP5353L1 (A57s/A77) 33.0 11.000000 3 2.0 0.666667 1100.000000 66.666667
Infinix Infinix-X690 (NOTE 7) 11.0 11.000000 1 11.0 11.000000 1100.000000 1100.000000
realme RE5894 (realme C33) 43.0 10.750000 4 159.0 39.750000 1075.000000 3975.000000
BLU B130DL (B131DL) 86.0 10.750000 8 323.0 40.375000 1075.000000 4037.500000
Xiaomi oxygen (MI MAX 2) 10.0 10.000000 1 10.0 10.000000 1000.000000 1000.000000
Redmi eos (Redmi 10 2022) 19.0 9.500000 2 40.0 20.000000 950.000000 2000.000000
HONOR HWAUM-Q (Honor 7C) 38.0 9.500000 4 11.0 2.750000 950.000000 275.000000
realme RMX2020 (RMX2020) 28.0 9.333333 3 6.0 2.000000 933.333333 200.000000
HONOR HNCLK-M1 (HONOR 90 Smart) 9.0 9.000000 1 13.0 13.000000 900.000000 1300.000000
Itel itel-A551L (itel A27(P17 Pro 4G)) 9.0 9.000000 1 23.0 23.000000 900.000000 2300.000000
PRITOM L8_B02 (L8_B02) 9.0 9.000000 1 7.0 7.000000 900.000000 700.000000
TECNO TECNO-KG5j (TECNO SPARK 8C) 27.0 9.000000 3 3.0 1.000000 900.000000 100.000000
POCO duchamp (POCO X6 Pro 5G) 52.0 8.666667 6 18.0 3.000000 866.666667 300.000000
OPPO CPH1823 (F9) 43.0 8.600000 5 138.0 27.600000 860.000000 2760.000000
samsung b0s (Galaxy S22 Ultra) 17.0 8.500000 2 16.0 8.000000 850.000000 800.000000
realme RMX3261 (realme C21Y) 25.0 8.333333 3 5.0 1.666667 833.333333 166.666667
samsung a03 (Galaxy A03) 48.0 8.000000 6 12.0 2.000000 800.000000 200.000000
samsung on7xreflte (Galaxy On7 Prime) 31.0 7.750000 4 14.0 3.500000 775.000000 350.000000
vivo V2327 (Y200e 5G) 31.0 7.750000 4 41.0 10.250000 775.000000 1025.000000
samsung j4primelte (Galaxy J4+) 85.0 7.727273 11 163.0 14.818182 772.727273 1481.818182
Redmi fire (Redmi 12) 98.0 7.538462 13 202.0 15.538462 753.846154 1553.846154
TECNO TECNO-KD7 (TECNO SPARK 5 Pro) 15.0 7.500000 2 4.0 2.000000 750.000000 200.000000
vivo 1716 (vivo 1716) 45.0 7.500000 6 153.0 25.500000 750.000000 2550.000000
Redmi ruby (Redmi Note 12 Pro 5G) 37.0 7.400000 5 7.0 1.400000 740.000000 140.000000
vivo 1727ID (V9) 7.0 7.000000 1 0.0 0.000000 700.000000 0.000000
OPPO OP4BA5L1 (CPH2113) 14.0 7.000000 2 1.0 0.500000 700.000000 50.000000
motorola pettyl (moto e5 play) 7.0 7.000000 1 16.0 16.000000 700.000000 1600.000000
HUAWEI HWSTK-HF (HUAWEI Y9 Prime 2019) 7.0 7.000000 1 2.0 2.000000 700.000000 200.000000
google gm4g_s_sprout (4G) 7.0 7.000000 1 1.0 1.000000 700.000000 100.000000
Xiaomi pine (Redmi 7A) 14.0 7.000000 2 23.0 11.500000 700.000000 1150.000000
TECNO TECNO-BF6 (TECNO POP 7) 21.0 7.000000 3 12.0 4.000000 700.000000 400.000000
Redmi emerald (Redmi Note 13 Pro) 13.0 6.500000 2 39.0 19.500000 650.000000 1950.000000
VGO_TEL NEW_20 (NEW_20) 38.0 6.333333 6 142.0 23.666667 633.333333 2366.666667
HONOR HWLLD-H (Honor 9 Lite) 62.0 6.200000 10 165.0 16.500000 620.000000 1650.000000
samsung a15x (Galaxy A15 5G) 43.0 6.142857 7 153.0 21.857143 614.285714 2185.714286
Itel itel-A661L (A49) 6.0 6.000000 1 0.0 0.000000 600.000000 0.000000
ATT WTATTRW2 (WTATTRW2) 12.0 6.000000 2 14.0 7.000000 600.000000 700.000000
Redmi merlin (Redmi Note 9) 36.0 6.000000 6 18.0 3.000000 600.000000 300.000000
TECNO TECNO-KD6 (SPARK 5 Air) 6.0 6.000000 1 5.0 5.000000 600.000000 500.000000
motorola hawaiip (moto g22) 29.0 5.800000 5 11.0 2.200000 580.000000 220.000000
HONOR HNRKY-M1 (HONOR X7a ) 29.0 5.800000 5 126.0 25.200000 580.000000 2520.000000
Redmi topaz (Redmi Note 12) 61.0 5.545455 11 33.0 3.000000 554.545455 300.000000
Infinix Infinix-X6511B (SMART 6) 11.0 5.500000 2 0.0 0.000000 550.000000 0.000000
OP595BL1 11.0 5.500000 2 9.0 4.500000 550.000000 450.000000
Infinix-X6831 22.0 5.500000 4 149.0 37.250000 550.000000 3725.000000
motorola maltalsc (moto e(7i) power) 11.0 5.500000 2 13.0 6.500000 550.000000 650.000000
vivo 1902 (vivo 1902) 27.0 5.400000 5 9.0 1.800000 540.000000 180.000000
asus ASUS_A001D_1 (ZenFone Max Plus M2 (ZB634KL)) 16.0 5.333333 3 20.0 6.666667 533.333333 666.666667
OPPO CPH1723 (CPH1723) 16.0 5.333333 3 23.0 7.666667 533.333333 766.666667
samsung j2y18lte (Galaxy Grand Prime Pro) 26.0 5.200000 5 30.0 6.000000 520.000000 600.000000
samsung j8y18lte (Galaxy J8) 10.0 5.000000 2 5.0 2.500000 500.000000 250.000000
sunstone 25.0 5.000000 5 27.0 5.400000 500.000000 540.000000
TECNO-BD4j 5.0 5.000000 1 1.0 1.000000 500.000000 100.000000
UMIDIGI F1 (F1) 5.0 5.000000 1 15.0 15.000000 500.000000 1500.000000
samsung a30 (Galaxy A30) 10.0 5.000000 2 10.0 5.000000 500.000000 500.000000
motorola parker (motorola one zoom) 5.0 5.000000 1 0.0 0.000000 500.000000 0.000000
motorola athene_f (Moto G(4) Plus) 5.0 5.000000 1 8.0 8.000000 500.000000 800.000000
HUAWEI HWBAC (HUAWEI nova 2 Plus) 5.0 5.000000 1 0.0 0.000000 500.000000 0.000000
motorola malta (moto e(7)) 64.0 4.923077 13 138.0 10.615385 492.307692 1061.538462
motorola ocean_t (moto g(7) power) 29.0 4.833333 6 16.0 2.666667 483.333333 266.666667
motorola montana (Moto G (5S)) 24.0 4.800000 5 11.0 2.200000 480.000000 220.000000
POCO citrus (POCO M3) 19.0 4.750000 4 4.0 1.000000 475.000000 100.000000
motorola sabahl (moto e13) 14.0 4.666667 3 17.0 5.666667 466.666667 566.666667
motorola hanoip (moto g(60)) 28.0 4.666667 6 149.0 24.833333 466.666667 2483.333333
samsung a6lte (Galaxy A6) 14.0 4.666667 3 0.0 0.000000 466.666667 0.000000
lge meh15lm (LM-K420) 28.0 4.666667 6 26.0 4.333333 466.666667 433.333333
motorola java (moto g(20)) 23.0 4.600000 5 12.0 2.400000 460.000000 240.000000
HUAWEI HWINE (nova 3i) 23.0 4.600000 5 12.0 2.400000 460.000000 240.000000
Infinix Infinix-X657C (SMART 5) 23.0 4.600000 5 3.0 0.600000 460.000000 60.000000
motorola ali (moto g(6)) 32.0 4.571429 7 21.0 3.000000 457.142857 300.000000
OPPO OP4EFDL1 (A53) 32.0 4.571429 7 135.0 19.285714 457.142857 1928.571429
FACETEL Q3pro_eea (Q3pro_eea) 18.0 4.500000 4 8.0 2.000000 450.000000 200.000000
vivo 1904 (vivo 1904) 58.0 4.461538 13 20.0 1.538462 446.153846 153.846154
Redmi tapas (Redmi Note 12) 58.0 4.142857 14 61.0 4.357143 414.285714 435.714286
Blackview Active8Pro (Active 8 Pro) 4.0 4.000000 1 1.0 1.000000 400.000000 100.000000
realme RE58B8L1 (RMX3771) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
motorola caprip (moto g(30)) 12.0 4.000000 3 15.0 5.000000 400.000000 500.000000
realme RE50C1 (realme 7i) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
TCL Passat (TCL 30+) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
vivo V2205 (Y35) 12.0 4.000000 3 13.0 4.333333 400.000000 433.333333
Infinix Infinix-X662B (HOT 11) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
HUAWEI HWMHA (Mate 9) 4.0 4.000000 1 1.0 1.000000 400.000000 100.000000
Redmi xun (Redmi Pad SE) 4.0 4.000000 1 2.0 2.000000 400.000000 200.000000
Reeder S19_Max_64GB (S19 Max 64GB) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
vivo 1901 (vivo 1901) 8.0 4.000000 2 4.0 2.000000 400.000000 200.000000
OPPO OP4C77L1 (A52) 4.0 4.000000 1 1.0 1.000000 400.000000 100.000000
Redmi curtana (Redmi Note 9 Pro) 20.0 4.000000 5 159.0 31.800000 400.000000 3180.000000
HONOR HNLLY-Q (HONOR X8b) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
TCL U5A_PLUS_4G (Alcatel 1X) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
SMART_W84_EEA 12.0 4.000000 3 23.0 7.666667 400.000000 766.666667
HUAWEI HWAGS-Q (HUAWEI MediaPad T3 10) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
V2251 4.0 4.000000 1 20.0 20.000000 400.000000 2000.000000
TECNO TECNO-KJ5n (TECNO SPARK 20) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
TECNO TECNO-KC3 (TECNO CAMON 12 Air) 8.0 4.000000 2 11.0 5.500000 400.000000 550.000000
samsung j2corelte (Galaxy J2 Core) 40.0 4.000000 10 33.0 3.300000 400.000000 330.000000
Infinix Infinix-X6512 (SMART 6 HD) 4.0 4.000000 1 0.0 0.000000 400.000000 0.000000
Multilaser Multilaser_E_2 (Multilaser E 2) 4.0 4.000000 1 1.0 1.000000 400.000000 100.000000
vivo V2333 (Y03) 51.0 3.923077 13 171.0 13.153846 392.307692 1315.384615
Infinix Infinix-X6731B (Infinix ZERO 30) 18.0 3.600000 5 19.0 3.800000 360.000000 380.000000
motorola bali (moto e6 play) 43.0 3.583333 12 53.0 4.416667 358.333333 441.666667
Redmi earth (Redmi 12C) 32.0 3.555556 9 38.0 4.222222 355.555556 422.222222
Infinix Infinix-X6815C (ZERO 5G 2023) 7.0 3.500000 2 10.0 5.000000 350.000000 500.000000
motorola payton (Moto X (4)) 7.0 3.500000 2 2.0 1.000000 350.000000 100.000000
samsung a01core (Galaxy A01 Core) 14.0 3.500000 4 2.0 0.500000 350.000000 50.000000
moonstone 7.0 3.500000 2 0.0 0.000000 350.000000 0.000000
Infinix Infinix-X688B (HOT Play) 35.0 3.500000 10 13.0 1.300000 350.000000 130.000000
Infinix Infinix-X6710 (NOTE 30 VIP ) 7.0 3.500000 2 1.0 0.500000 350.000000 50.000000
xiaomi lavender (Redmi Note 7) 14.0 3.500000 4 2.0 0.500000 350.000000 50.000000
samsung a14x (Galaxy A14 5G) 48.0 3.428571 14 181.0 12.928571 342.857143 1292.857143
motorola borag (moto e22) 30.0 3.333333 9 22.0 2.444444 333.333333 244.444444
vivo PD1901 (V1901A) 23.0 3.285714 7 4.0 0.571429 328.571429 57.142857
OPPO CPH1819 (F7) 13.0 3.250000 4 4.0 1.000000 325.000000 100.000000
OPPO OP4F25L1 (CPH2159) 13.0 3.250000 4 1.0 0.250000 325.000000 25.000000
samsung a05m (Galaxy A05) 48.0 3.200000 15 26.0 1.733333 320.000000 173.333333
samsung a14 (Galaxy A14) 48.0 3.200000 15 36.0 2.400000 320.000000 240.000000
HONOR HNRMO-Q (HONOR X9a 5G) 16.0 3.200000 5 2.0 0.400000 320.000000 40.000000
motorola penang (moto g53 5G) 16.0 3.200000 5 1.0 0.200000 320.000000 20.000000
motorola penangf (moto g13) 19.0 3.166667 6 35.0 5.833333 316.666667 583.333333
samsung j4corelte (Galaxy J4 Core) 37.0 3.083333 12 37.0 3.083333 308.333333 308.333333
realme RMX2001L1 (realme 6) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
samsung dreamlte (Galaxy S8) 9.0 3.000000 3 0.0 0.000000 300.000000 0.000000
motorola guamp (moto g(9) play) 6.0 3.000000 2 0.0 0.000000 300.000000 0.000000
realme RMX2151L1 (realme 7) 3.0 3.000000 1 2.0 2.000000 300.000000 200.000000
motorola troika_sprout (motorola one action) 3.0 3.000000 1 1.0 1.000000 300.000000 100.000000
lge mlv5 (LG K10 (2017)) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
motorola astro (motorola one fusion) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
samsung m20lte (Galaxy M20) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
Teclast T40Air_ROW (T40Air_ROW) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
Itel itel-L5002 (A25) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
Infinix Infinix-X626B-LTE (S4) 3.0 3.000000 1 12.0 12.000000 300.000000 1200.000000
Xiaomi nabu (Xiaomi Pad 5) 6.0 3.000000 2 1.0 0.500000 300.000000 50.000000
TECNO TECNO-BC1s (POP 4 LTE) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
OnePlus OnePlus9Pro (OnePlus 9 Pro 5G) 3.0 3.000000 1 1.0 1.000000 300.000000 100.000000
OnePlus OP5958L1 (OnePlus Nord CE 3 Lite 5G) 6.0 3.000000 2 10.0 5.000000 300.000000 500.000000
OPPO OP4F4DL1 (CPH2213) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
vivo 1819 (vivo 1819) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
OPPO OP4B83L1 (CPH1907) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
Casper VIA_M30 (VIA M30) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
samsung beyond2q (Galaxy S10+) 3.0 3.000000 1 0.0 0.000000 300.000000 0.000000
Infinix Infinix-X669D (Infinix HOT 30i) 12.0 3.000000 4 37.0 9.250000 300.000000 925.000000
samsung m13 (Galaxy F13) 23.0 2.875000 8 29.0 3.625000 287.500000 362.500000
TECNO TECNO-BG6 (TECNO) 40.0 2.857143 14 21.0 1.500000 285.714286 150.000000
Infinix Infinix-X650C (HOT 8) 17.0 2.833333 6 24.0 4.000000 283.333333 400.000000
motorola fiji (moto e6s) 28.0 2.800000 10 21.0 2.100000 280.000000 210.000000
Redmi heat (Redmi 12) 14.0 2.800000 5 30.0 6.000000 280.000000 600.000000
motorola pokerp (moto e? plus) 11.0 2.750000 4 15.0 3.750000 275.000000 375.000000
Infinix Infinix-X6833B (Infinix NOTE 30) 44.0 2.750000 16 53.0 3.312500 275.000000 331.250000
samsung on7xelte (Galaxy J7 Prime) 22.0 2.750000 8 16.0 2.000000 275.000000 200.000000
lge mdh15lm (LG K41S) 30.0 2.727273 11 21.0 1.909091 272.727273 190.909091
vivo 1814 (vivo 1814) 19.0 2.714286 7 11.0 1.571429 271.428571 157.142857
motorola cancunf (moto g54 5G) 35.0 2.692308 13 28.0 2.153846 269.230769 215.384615
OPPO OP5AF2L1 (Reno12 F/FS 5G) 8.0 2.666667 3 2.0 0.666667 266.666667 66.666667
samsung a13 (Galaxy A13) 44.0 2.588235 17 33.0 1.941176 258.823529 194.117647
Infinix Infinix-X6525 (Infinix SMART 8) 36.0 2.571429 14 23.0 1.642857 257.142857 164.285714
Redmi gust (Redmi 13C) 36.0 2.571429 14 40.0 2.857143 257.142857 285.714286
samsung gta3xl (Galaxy Tab A) 5.0 2.500000 2 3.0 1.500000 250.000000 150.000000
Nokia MNT (Nokia C10) 5.0 2.500000 2 1.0 0.500000 250.000000 50.000000
maui 5.0 2.500000 2 0.0 0.000000 250.000000 0.000000
Infinix Infinix-X6823 (SMART 6 PLUS) 5.0 2.500000 2 1.0 0.500000 250.000000 50.000000
lge dh0lm (K22) 5.0 2.500000 2 1.0 0.500000 250.000000 50.000000
Redmi sea (Redmi Note 12S) 10.0 2.500000 4 16.0 4.000000 250.000000 400.000000
Redmi iron (Redmi Note 13 5G) 5.0 2.500000 2 2.0 1.000000 250.000000 100.000000
realme RED8D1 (realme C21-Y) 10.0 2.500000 4 2.0 0.500000 250.000000 50.000000
samsung gteslteatt (Galaxy Tab E 8.0) 5.0 2.500000 2 1.0 0.500000 250.000000 50.000000
lge mmh4p (LM-X525) 10.0 2.500000 4 2.0 0.500000 250.000000 50.000000
Infinix Infinix-X663 (NOTE) 5.0 2.500000 2 1.0 0.500000 250.000000 50.000000
vivo 2026 (V2043) 27.0 2.454545 11 20.0 1.818182 245.454545 181.818182
samsung m14x (Galaxy M14 5G) 12.0 2.400000 5 12.0 2.400000 240.000000 240.000000
Infinix Infinix-X6812B (Infinix HOT 11S NFC) 12.0 2.400000 5 12.0 2.400000 240.000000 240.000000
OPPO OP574FL1 (A58) 12.0 2.400000 5 2.0 0.400000 240.000000 40.000000
OPPO OP56E8L1 (A98 5G) 12.0 2.400000 5 3.0 0.600000 240.000000 60.000000
motorola lion (moto g04) 26.0 2.363636 11 20.0 1.818182 236.363636 181.818182
Lenovo X306F (Lenovo Tab M10 HD (2nd Gen)) 7.0 2.333333 3 2.0 0.666667 233.333333 66.666667
realme RE58AB (realme C30s) 14.0 2.333333 6 3.0 0.500000 233.333333 50.000000
Infinix Infinix-X680 (Infinix HOT 9 Play) 7.0 2.333333 3 3.0 1.000000 233.333333 100.000000
vivo 1727 (vivo 1727) 7.0 2.333333 3 1.0 0.333333 233.333333 33.333333
Redmi sunny (Redmi Note 10) 9.0 2.250000 4 11.0 2.750000 225.000000 275.000000
samsung a14xm (Galaxy A14 5G) 9.0 2.250000 4 2.0 0.500000 225.000000 50.000000
samsung a04e (Galaxy A04e) 31.0 2.214286 14 27.0 1.928571 221.428571 192.857143
samsung a01q (Galaxy A01) 33.0 2.200000 15 9.0 0.600000 220.000000 60.000000
samsung a02 (Galaxy A02) 22.0 2.200000 10 10.0 1.000000 220.000000 100.000000
motorola fogorow (moto g24) 11.0 2.200000 5 2.0 0.400000 220.000000 40.000000
Vertex Pro_P300_4G (Pro_P300_4G) 13.0 2.166667 6 27.0 4.500000 216.666667 450.000000
Redmi camellian (Redmi Note 10 5G) 13.0 2.166667 6 5.0 0.833333 216.666667 83.333333
motorola aljeter (moto g(6) play) 13.0 2.166667 6 6.0 1.000000 216.666667 100.000000
samsung a23 (Galaxy A23) 23.0 2.090909 11 24.0 2.181818 209.090909 218.181818
Redmi sweet (Redmi Note 12 Pro) 27.0 2.076923 13 35.0 2.692308 207.692308 269.230769
samsung a02q (Galaxy A02s) 8.0 2.000000 4 7.0 1.750000 200.000000 175.000000
Itel itel-A551L-Pro (itel A27(P17 Pro 4G)) 6.0 2.000000 3 3.0 1.000000 200.000000 100.000000
Infinix Infinix-X6836 (Infinix HOT 40) 6.0 2.000000 3 1.0 0.333333 200.000000 33.333333
motorola channel (moto g(7) play) 14.0 2.000000 7 3.0 0.428571 200.000000 42.857143
motorola cypfq (moto g51 5G) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
motorola harpia_n (Moto G (4) Play) 6.0 2.000000 3 2.0 0.666667 200.000000 66.666667
OP530DL1 4.0 2.000000 2 0.0 0.000000 200.000000 0.000000
motorola lima (moto g(8) play) 6.0 2.000000 3 10.0 3.333333 200.000000 333.333333
motorola borago (moto e22i) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
samsung greatlte (Galaxy Note8) 6.0 2.000000 3 1.0 0.333333 200.000000 33.333333
Infinix Infinix-X6526 (SMART 8 Plus) 4.0 2.000000 2 3.0 1.500000 200.000000 150.000000
Nokia NE1 (Nokia 3) 2.0 2.000000 1 4.0 4.000000 200.000000 400.000000
OPPO CPH1803 (A3s) 22.0 2.000000 11 24.0 2.181818 200.000000 218.181818
realme RMX2021 (realme C3) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
samsung gta8wifi (Galaxy Tab A8) 10.0 2.000000 5 1.0 0.200000 200.000000 20.000000
realme RE879EL1 (realme 9 Pro+) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
realme RED8CDL1 (narzo 50 5G) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
HONOR HWBKK-Q (honor 8C) 6.0 2.000000 3 0.0 0.000000 200.000000 0.000000
Cat S42G (Cat S42) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Casper VIA_G3 (VIA_ G3) 4.0 2.000000 2 2.0 1.000000 200.000000 100.000000
Casper VIA_E30 (VIA_E3_0) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
xiaomi daisy_sprout (Mi A2 Lite) 4.0 2.000000 2 2.0 1.000000 200.000000 100.000000
B1550VL 4.0 2.000000 2 8.0 4.000000 200.000000 400.000000
samsung a52q (Galaxy A52) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
samsung a24 (Galaxy A24) 12.0 2.000000 6 2.0 0.333333 200.000000 33.333333
realme RMX3195 (realme C25s) 8.0 2.000000 4 9.0 2.250000 200.000000 225.000000
vivo V2217 (Y02) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
samsung m12 (Galaxy M12) 6.0 2.000000 3 15.0 5.000000 200.000000 500.000000
lge mdh35lm (K51S) 6.0 2.000000 3 1.0 0.333333 200.000000 33.333333
iQOO I2126 (iQOO Z6 Pro) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
lge L-01K (V30+) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
docomo SO-01K (Xperia XZ1) 2.0 2.000000 1 3.0 3.000000 200.000000 300.000000
google relm_cheets (Intel Braswell Chromebook) 4.0 2.000000 2 14.0 7.000000 200.000000 700.000000
TECNO TECNO-KF6n (TECNO SPARK 7) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
TECNO TECNO-BF7n (TECNO) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Xiaomi courbet (Mi 11 Lite) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
TECNO TECNO-LH7n (POVA 5) 12.0 2.000000 6 9.0 1.500000 200.000000 150.000000
WIKO W-V770 (T3) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
samsung on7xltechn (Galaxy J7 Prime) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
VGOTEL NEW_24 (NEW 24) 2.0 2.000000 1 2.0 2.000000 200.000000 200.000000
OPPO OP5709L1 (OPPO Reno8 T) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
OPPO OP5AD3L1 (Reno 11 Pro) 4.0 2.000000 2 0.0 0.000000 200.000000 0.000000
lge cv109 (LG K9) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
OPPO OP533FL1 (OPPO Reno7/F21 Pro) 4.0 2.000000 2 2.0 1.000000 200.000000 100.000000
OPPO OP532FL1 (OPPO Reno7 Z 5G) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Xiaomi pipa (Xiaomi Pad 6) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Xiaomi rosy (Redmi 5) 4.0 2.000000 2 0.0 0.000000 200.000000 0.000000
Xiaomi santoni (Redmi 4X) 14.0 2.000000 7 2.0 0.285714 200.000000 28.571429
Multilaser Multilaser_E_Lite (Multilaser E Lite) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Itel itel-S681LN (itel S23+) 2.0 2.000000 1 2.0 2.000000 200.000000 200.000000
Itel itel-W5006X (A17) 4.0 2.000000 2 0.0 0.000000 200.000000 0.000000
vivo 1723CF (V9 6GB) 6.0 2.000000 3 14.0 4.666667 200.000000 466.666667
Itel itel-A571W (A37) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
vivo 2015 (vivo 2015) 12.0 2.000000 6 23.0 3.833333 200.000000 383.333333
vivo 1951 (Z1 Pro) 4.0 2.000000 2 1.0 0.500000 200.000000 50.000000
OnePlus OP5552L1 (OnePlus 10T 5G) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
vivo 1812 (vivo 1812) 8.0 2.000000 4 1.0 0.250000 200.000000 25.000000
Nokia COS (Nokia C20) 2.0 2.000000 1 1.0 1.000000 200.000000 100.000000
POCO stone (POCO M5) 12.0 2.000000 6 3.0 0.500000 200.000000 50.000000
Redmi lancelot (Redmi 9 Prime) 8.0 2.000000 4 6.0 1.500000 200.000000 150.000000
Redmi ice (Redmi A1) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Realme RMX1821 (realme 3) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
Infinix Infinix-X6817 (Infinix HOT 12) 4.0 2.000000 2 0.0 0.000000 200.000000 0.000000
vivo V2247 (Y36) 14.0 2.000000 7 11.0 1.571429 200.000000 157.142857
vivo V2317 (V30 Lite) 2.0 2.000000 1 0.0 0.000000 200.000000 0.000000
OPPO OP4F2F (CPH2185) 23.0 1.916667 12 6.0 0.500000 191.666667 50.000000
vivo 1906 (vivo 1906 ) 26.0 1.857143 14 44.0 3.142857 185.714286 314.285714
samsung a3core (Galaxy A03 Core) 11.0 1.833333 6 10.0 1.666667 183.333333 166.666667
Infinix Infinix-X665E (HOT 20i) 11.0 1.833333 6 2.0 0.333333 183.333333 33.333333
samsung a12 (Galaxy A12) 20.0 1.818182 11 9.0 0.818182 181.818182 81.818182
HONOR HNVNE-M (HONOR X6) 9.0 1.800000 5 1.0 0.200000 180.000000 20.000000
samsung gta7lite (Galaxy Tab A7 Lite) 9.0 1.800000 5 2.0 0.400000 180.000000 40.000000
motorola river (moto g(7)) 9.0 1.800000 5 11.0 2.200000 180.000000 220.000000
samsung a33x (Galaxy A33 5G) 18.0 1.800000 10 5.0 0.500000 180.000000 50.000000
realme REE2ADL1 (RMX3710) 16.0 1.777778 9 25.0 2.777778 177.777778 277.777778
realme RE5C9F (RMX3834) 16.0 1.777778 9 6.0 0.666667 177.777778 66.666667
Redmi fleur (Redmi Note 11S) 7.0 1.750000 4 5.0 1.250000 175.000000 125.000000
HUAWEI HWJAT-M (HUAWEI Y6s) 21.0 1.750000 12 29.0 2.416667 175.000000 241.666667
Redmi ocean (Redmi Note 12S) 7.0 1.750000 4 30.0 7.500000 175.000000 750.000000
motorola dubai (motorola edge 30) 7.0 1.750000 4 4.0 1.000000 175.000000 100.000000
OPPO OP575DL1 (A18) 7.0 1.750000 4 0.0 0.000000 175.000000 0.000000
samsung on5xelte (Galaxy J5 Prime) 21.0 1.750000 12 34.0 2.833333 175.000000 283.333333
vivo 1938 (vivo 1938) 7.0 1.750000 4 6.0 1.500000 175.000000 150.000000
Nokia WVR_sprout (Nokia 2.4) 7.0 1.750000 4 9.0 2.250000 175.000000 225.000000
vivo 1811 (vivo 1811) 19.0 1.727273 11 6.0 0.545455 172.727273 54.545455
samsung a03s (Galaxy A03s) 22.0 1.692308 13 21.0 1.615385 169.230769 161.538462
motorola doha (moto g(8) plus) 15.0 1.666667 9 44.0 4.888889 166.666667 488.888889
Redmi joyeuse (Redmi Note 9 Pro) 5.0 1.666667 3 0.0 0.000000 166.666667 0.000000
OPPO OP4F97 (CPH2269) 15.0 1.666667 9 12.0 1.333333 166.666667 133.333333
ZTE P963F50 (ZTE Blade A5 2020) 5.0 1.666667 3 2.0 0.666667 166.666667 66.666667
motorola coful (moto g31) 10.0 1.666667 6 3.0 0.500000 166.666667 50.000000
OPPO OP4C72L1 (A92) 5.0 1.666667 3 2.0 0.666667 166.666667 66.666667
samsung j5y17lte (Galaxy J5) 10.0 1.666667 6 10.0 1.666667 166.666667 166.666667
samsung a10s (Galaxy A10s) 18.0 1.636364 11 14.0 1.272727 163.636364 127.272727
lge mmh4 (LM-X420) 18.0 1.636364 11 14.0 1.272727 163.636364 127.272727
Realme RMX1941 (realme C2) 18.0 1.636364 11 12.0 1.090909 163.636364 109.090909
vivo V2310 (Y17s) 21.0 1.615385 13 25.0 1.923077 161.538462 192.307692
Infinix Infinix-X670 (NOTE 12) 8.0 1.600000 5 1.0 0.200000 160.000000 20.000000
Infinix Infinix-X6515 (Infinix SMART 7) 22.0 1.571429 14 26.0 1.857143 157.142857 185.714286
HUAWEI HWDUB-Q (Y7 Prime 2019) 23.0 1.533333 15 20.0 1.333333 153.333333 133.333333
HONOR HWJSN-H (Honor 8X) 6.0 1.500000 4 9.0 2.250000 150.000000 225.000000
samsung a7y18lte (Galaxy A7 (2018)) 9.0 1.500000 6 13.0 2.166667 150.000000 216.666667
TECNO TECNO-LG6n (POVA Neo 2) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
Redmi blue (Redmi A3) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
samsung a20s (Galaxy A20s) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
OPPO OP5303 (CPH2325) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
Redmi spesn (Redmi Note 11) 6.0 1.500000 4 10.0 2.500000 150.000000 250.000000
OPPO OP4B80L1 (A9 2020) 3.0 1.500000 2 17.0 8.500000 150.000000 850.000000
realme RE50BF (realme C17) 3.0 1.500000 2 3.0 1.500000 150.000000 150.000000
OPPO CPH1801 (CPH1801) 3.0 1.500000 2 11.0 5.500000 150.000000 550.000000
Itel itel-S666LN (itel RS4) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
GM G301 (GM 20) 3.0 1.500000 2 0.0 0.000000 150.000000 0.000000
TECNO TECNO-KE5j (SPARK 6 GO ) 3.0 1.500000 2 0.0 0.000000 150.000000 0.000000
HUAWEI HWLDN-Q (HUAWEI Y7 Prime 2018) 9.0 1.500000 6 11.0 1.833333 150.000000 183.333333
motorola bangkk (moto g84 5G) 3.0 1.500000 2 1.0 0.500000 150.000000 50.000000
realme RMX1911 (realme 5) 3.0 1.500000 2 0.0 0.000000 150.000000 0.000000
DOOGEE RK2 (U9) 3.0 1.500000 2 8.0 4.000000 150.000000 400.000000
OPPO OP5355 (A57) 15.0 1.500000 10 4.0 0.400000 150.000000 40.000000
TCL Bangkok_TF (TCL A3) 3.0 1.500000 2 6.0 3.000000 150.000000 300.000000
OPPO OP5A0BL1 (F25 Pro 5G) 6.0 1.500000 4 6.0 1.500000 150.000000 150.000000
TECNO TECNO-BF7 (TECNO) 21.0 1.500000 14 19.0 1.357143 150.000000 135.714286
realme RMX2030 (realme 5i) 10.0 1.428571 7 4.0 0.571429 142.857143 57.142857
realme RMX2185 (realme C11) 20.0 1.428571 14 25.0 1.785714 142.857143 178.571429
Redmi angelican (Redmi 9C NFC) 7.0 1.400000 5 3.0 0.600000 140.000000 60.000000
Xiaomi amber (Xiaomi 11T) 7.0 1.400000 5 4.0 0.800000 140.000000 80.000000
Redmi fog (Redmi 10C) 11.0 1.375000 8 21.0 2.625000 137.500000 262.500000
xiaomi ginkgo (Redmi Note 8) 11.0 1.375000 8 23.0 2.875000 137.500000 287.500000
Redmi gale (Redmi 13C) 19.0 1.357143 14 24.0 1.714286 135.714286 171.428571
HONOR HNTFY-Q (HONOR X8) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
Alldocube T1023 (X game) 4.0 1.333333 3 1.0 0.333333 133.333333 33.333333
motorola deen_sprout (Motorola one) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
samsung hero2lte (Galaxy S7 edge) 4.0 1.333333 3 4.0 1.333333 133.333333 133.333333
Redmi lime (Redmi 9 Power) 4.0 1.333333 3 0.0 0.000000 133.333333 0.000000
HONOR HNALI-Q (HONOR X9b 5G) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
realme RE5C82L1 (realme 12 Pro+ 5G) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
HUAWEI HWRNE (Mate 10 lite) 4.0 1.333333 3 4.0 1.333333 133.333333 133.333333
Infinix Infinix-X6528 (Infinix HOT 40i ) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
HUAWEI HWAMN-M (HUAWEI Y5 2019) 8.0 1.333333 6 6.0 1.000000 133.333333 100.000000
OPPO OP56DBL1 (Reno10 Pro 5G) 4.0 1.333333 3 4.0 1.333333 133.333333 133.333333
realme RED8F6 (realme C25Y) 4.0 1.333333 3 2.0 0.666667 133.333333 66.666667
samsung a21s (Galaxy A21s) 8.0 1.333333 6 14.0 2.333333 133.333333 233.333333
samsung a04s (Galaxy A04s) 12.0 1.333333 9 15.0 1.666667 133.333333 166.666667
samsung a51 (Galaxy A51) 12.0 1.333333 9 3.0 0.333333 133.333333 33.333333
samsung a50 (Galaxy A50) 4.0 1.333333 3 4.0 1.333333 133.333333 133.333333
OPPO OP5AE7L1 (A60) 17.0 1.307692 13 9.0 0.692308 130.769231 69.230769
vivo 1807 (vivo 1807) 9.0 1.285714 7 12.0 1.714286 128.571429 171.428571
samsung a54x (Galaxy A54 5G) 14.0 1.272727 11 10.0 0.909091 127.272727 90.909091
OPPO OP4C7D (CPH2015) 14.0 1.272727 11 11.0 1.000000 127.272727 100.000000
SG SG706SH (AQUOS R2) 5.0 1.250000 4 2.0 0.500000 125.000000 50.000000
xiaomi ysl (Redmi S2) 5.0 1.250000 4 10.0 2.500000 125.000000 250.000000
xiaomi willow (Redmi Note 8T) 5.0 1.250000 4 8.0 2.000000 125.000000 200.000000
realme RE58BC (realme C51) 10.0 1.250000 8 13.0 1.625000 125.000000 162.500000
Redmi merlinnfc (Redmi Note 9) 5.0 1.250000 4 2.0 0.500000 125.000000 50.000000
Redmi garnet (Redmi Note 13 Pro 5G) 5.0 1.250000 4 2.0 0.500000 125.000000 50.000000
realme RE5C91L1 (realme C67) 5.0 1.250000 4 3.0 0.750000 125.000000 75.000000
OPPO OP571F (CPH2387) 5.0 1.250000 4 7.0 1.750000 125.000000 175.000000
Infinix Infinix-X669C (Infinix HOT 30i) 5.0 1.250000 4 6.0 1.500000 125.000000 150.000000
motorola evert_nt (moto g(6) plus) 6.0 1.200000 5 8.0 1.600000 120.000000 160.000000
reeder M7S (M7S) 6.0 1.200000 5 2.0 0.400000 120.000000 40.000000
samsung a05s (Galaxy A05s) 6.0 1.200000 5 6.0 1.200000 120.000000 120.000000
vivo 1808 (vivo 1808) 6.0 1.200000 5 1.0 0.200000 120.000000 20.000000
Redmi aether (Redmi 12C) 6.0 1.200000 5 0.0 0.000000 120.000000 0.000000
HONOR HNWDY-M (HONOR X6a) 6.0 1.200000 5 4.0 0.800000 120.000000 80.000000
HONOR HNCRT-M2 (HONOR 90 Lite) 6.0 1.200000 5 2.0 0.400000 120.000000 40.000000
realme RE54B4L1 (realme 8i) 7.0 1.166667 6 7.0 1.166667 116.666667 116.666667
vivo 1726 (vivo 1726) 7.0 1.166667 6 3.0 0.500000 116.666667 50.000000
OPPO OP4BFB (CPH2083) 15.0 1.153846 13 14.0 1.076923 115.384615 107.692308
vivo 2120 (V2120) 8.0 1.142857 7 3.0 0.428571 114.285714 42.857143
TECNO TECNO-BG7n (TECNO SPARK 20C) 11.0 1.100000 10 4.0 0.400000 110.000000 40.000000
samsung a15 (Galaxy A15) 11.0 1.100000 10 7.0 0.700000 110.000000 70.000000
samsung a12s (Galaxy A12) 11.0 1.100000 10 26.0 2.600000 110.000000 260.000000
HUAWEI HWANE (P20 lite) 13.0 1.083333 12 15.0 1.250000 108.333333 125.000000
Redmi spes (Redmi Note 11) 14.0 1.076923 13 5.0 0.384615 107.692308 38.461538
vivo 1820 (vivo 1820) 15.0 1.071429 14 7.0 0.500000 107.142857 50.000000
samsung a55x (Galaxy A55 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
SPC GRAVITY_4 (GRAVITY_4) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Redmi yunluo (Redmi Pad) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
xiaomi violet (Redmi Note 7 Pro) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung a21 (Galaxy A21) 3.0 1.000000 3 0.0 0.000000 100.000000 0.000000
TECNO TECNO-LH6n (TECNO POVA Neo 3) 3.0 1.000000 3 1.0 0.333333 100.000000 33.333333
TECNO TECNO-KA7-GO (SPARK 2) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
sky 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung a20p (Galaxy A20) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
OnePlus OP557AL1 (OnePlus Nord 2T 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OnePlus OnePlus9 (OnePlus 9 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
POCO alioth (POCO F3) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OnePlus OnePlusNordCE (OnePlus Nord CE 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
POCO redwood (POCO X5 Pro 5G) 4.0 1.000000 4 4.0 1.000000 100.000000 100.000000
samsung c9ltechn (Galaxy C9 Pro) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung d1 (Galaxy Note10) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung crownqltechn (Galaxy Note9) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
HONOR HWKSA-M (HONOR 8S) 2.0 1.000000 2 3.0 1.500000 100.000000 150.000000
KDDI SCV46 (Galaxy A20) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung a23xq (Galaxy A23 5G) 3.0 1.000000 3 0.0 0.000000 100.000000 0.000000
motorola nora_8917 (Moto E (5)) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola sanders_nt (Moto G (5S) Plus) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
motorola fijisc (moto e(6i)) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola foles (moto z4) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
Infinix Infinix-X6815B (Infinix ZERO 5G) 1.0 1.000000 1 2.0 2.000000 100.000000 200.000000
motorola hawao (moto g42) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola kyoto (motorola edge 20 lite) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO CPH1909 (CPH1909) 14.0 1.000000 14 23.0 1.642857 100.000000 164.285714
motorola lake_n (moto g(7) plus) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
OPPO OP486C (A1k) 8.0 1.000000 8 10.0 1.250000 100.000000 125.000000
samsung a70q (Galaxy A70) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
vivo 2126 (V2126) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Alcatel Seoul (Alcatel 1B) 3.0 1.000000 3 1.0 0.333333 100.000000 33.333333
TECNO TECNO-KG7 (TECNO SPARK 8 P) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
asus ASUS_AI2203 (ROG Phone 6D Ultimate) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
TECNO TECNO-LG7n (TECNO POVA 4) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
TECNO TECNO-LF7n (TECNO POVA 3) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
TECNO TECNO-LE6 (POVA Neo) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
TECNO TECNO-KJ6 (TECNO SPARK 20 Pro) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
TECNO TECNO-KI7 (TECNO SPARK 10 Pro) 8.0 1.000000 8 10.0 1.250000 100.000000 125.000000
Xiaomi olivelite (Redmi 8A) 3.0 1.000000 3 5.0 1.666667 100.000000 166.666667
Xiaomi olive (Redmi 8) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
WIKO W-V673 (T10) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
xiaomi ugglite (Redmi Note 5A) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
xiaomi onc (Redmi 7) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung r8s (Galaxy S20 FE) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Lenovo TB-X505F (Lenovo Tab M10 (HD)) 1.0 1.000000 1 9.0 9.000000 100.000000 900.000000
samsung r0q (Galaxy S22) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
realme RMX2180 (realme C15) 4.0 1.000000 4 0.0 0.000000 100.000000 0.000000
reeder P13_Blue_Pro_2022 (P13_Blue_Pro_2022) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung a03su (Galaxy A03s) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
TECNO TECNO-BC2c (TECNO POP 4 ) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
vivo 1907 (vivo 1907) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
TECNO TECNO-KI5q (TECNO SPARK 10) 6.0 1.000000 6 18.0 3.000000 100.000000 300.000000
TECNO TECNO-KI5m (TECNO SPARK 10C) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
vivo 1718 (vivo 1718) 3.0 1.000000 3 3.0 1.000000 100.000000 100.000000
TECNO TECNO-BE8 (POP 6 Pro) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
TECNO TECNO-CK6n (TECNO CAMON 20 PRO) 2.0 1.000000 2 2.0 1.000000 100.000000 100.000000
TECNO TECNO-CH7n (CAMON 18P) 1.0 1.000000 1 3.0 3.000000 100.000000 300.000000
samsung q5q (Galaxy Z Fold5) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 1601 (1601) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
vivo 1714 (vivo 1714) 2.0 1.000000 2 3.0 1.500000 100.000000 150.000000
TECNO TECNO-CG6j (CAMON 17) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo V2207 (vivo Y22) 7.0 1.000000 7 14.0 2.000000 100.000000 200.000000
ELEVATE G62 (LUNA G62) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
HUAWEI HWFIG-H (FIG-LX1) 2.0 1.000000 2 2.0 1.000000 100.000000 100.000000
asus ASUS_X01A_1 (ZenFone Max M2 (ZB633KL) (WW) / Max M2 (ZB632KL) (IN)) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
asus ASUS_X00LD_3 (ZenFone Live Plus (ZB553KL)) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
OPPO OP4FA7L1 (OPPO Reno6 Z 5G) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
HUAWEI HWWAS-H (P10 lite) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
TCL Cruze (TCL 30E) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Infinix Infinix-X627STU (Smart 3 Plus) 3.0 1.000000 3 2.0 0.666667 100.000000 66.666667
samsung m04 (Galaxy M04) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
TCL Civic_Plus (TCL 408) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Multilaser Multilaser_F (Multilaser_F) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 1915 (vivo 1915) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
Nokia DRSA_sprout (Nokia 3.4) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 1919 (vivo 1919) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Itel itel-W6503 (itel S16 Pro) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
vivo V2342 (Y100) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung a13ve (Galaxy A13) 5.0 1.000000 5 13.0 2.600000 100.000000 260.000000
Doha_TMO 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung m21 (Galaxy M21) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung m32 (Galaxy M32) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung m53x (Galaxy M53 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP5325 (CPH2421) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
OPPO OP5335L1 (OPPO Reno8 Pro 5G) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
OPPO OP56E1L1 (CPH2455) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
OPPO OP56EDL1 (Reno8 T 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO CPH1901 (A7) 9.0 1.000000 9 9.0 1.000000 100.000000 100.000000
OPPO CPH1859 (realme 1) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP56F5 (A17) 10.0 1.000000 10 10.0 1.000000 100.000000 100.000000
samsung dreamqltesq (Galaxy S8) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
OPPO OP5705L1 (Reno10 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Xiaomi tiare (Redmi Go) 5.0 1.000000 5 6.0 1.200000 100.000000 120.000000
ZTE P963F01 (ZTE Blade V10 Vita) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
EVERCOSS M6 (M6) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Xiaomi taoyao (Xiaomi 12 Lite) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
lge mcv7a (LG Stylo 4) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
lge mcv3 (LG K11) 3.0 1.000000 3 5.0 1.666667 100.000000 166.666667
Itel itel-A571L (A26) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung gtaxllte (Galaxy Tab A (2016)) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung gta8 (Galaxy Tab A8) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
Infinix Infinix-X6516 (Infinix SMART 7 HD) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
Infinix Infinix-X6511E (SMART 6) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Nokia DRS_sprout (Nokia 3.4) 3.0 1.000000 3 2.0 0.666667 100.000000 66.666667
OPPO CPH1727 (CPH1727) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
OPPO CPH1717 (CPH1717) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung e3q (Galaxy S24 Ultra) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola devon (moto g32) 2.0 1.000000 2 5.0 2.500000 100.000000 250.000000
Infinix Infinix-X680B (Infinix HOT 9 Play) 3.0 1.000000 3 2.0 0.666667 100.000000 66.666667
samsung j5y17ltedx (Galaxy J5 Pro) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Infinix Infinix-X678B (NOTE 30 Pro) 3.0 1.000000 3 1.0 0.333333 100.000000 33.333333
HUAWEI HWPAR (nova 3) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 1802 (vivo 1802) 4.0 1.000000 4 7.0 1.750000 100.000000 175.000000
HUAWEI HWPOT-H (HUAWEI P smart 2019) 3.0 1.000000 3 0.0 0.000000 100.000000 0.000000
HUAWEI HWFLA-H (HUAWEI Y9 2018) 2.0 1.000000 2 12.0 6.000000 100.000000 600.000000
Infinix Infinix-X6837 (Infinix HOT 40 Pro) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Infinix Infinix-X688C (HOT Play) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP52F3L1 (A78 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
lge mmh45lm (K50S) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Infinix-X6816 3.0 1.000000 3 4.0 1.333333 100.000000 133.333333
motorola def (motorola one hyper) 2.0 1.000000 2 2.0 1.000000 100.000000 100.000000
samsung j7xelte (Galaxy J7(2016)) 2.0 1.000000 2 3.0 1.500000 100.000000 150.000000
Redmi XIG03 (Redmi 12 5G) 3.0 1.000000 3 3.0 1.000000 100.000000 100.000000
xiaomi cereus (Redmi 6) 3.0 1.000000 3 1.0 0.333333 100.000000 33.333333
vivo V2236 (Y02) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung gto (Galaxy Tab A (8.0", 2019)) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
samsung gts7xllite (Galaxy Tab S7 FE) 1.0 1.000000 1 3.0 3.000000 100.000000 300.000000
OP5312L1 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP4EF3L1 (CPH2095) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP4B79L1 (A5 2020) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
OPPO OP4B9EL1 (Reno3) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
OPPO OP4883 (F11) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Itel itel-A16-Plus (itel-A16-Plus) 1.0 1.000000 1 3.0 3.000000 100.000000 300.000000
Infinix Infinix-X689B (Infinix HOT 10S) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
motorola sofiar (moto g(8) power) 2.0 1.000000 2 1.0 0.500000 100.000000 50.000000
samsung dm2q (Galaxy S23+) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 2037 (V2037) 3.0 1.000000 3 2.0 0.666667 100.000000 66.666667
Itel itel-A663LC (A05s) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Itel itel-A632WM (itel A04) 1.0 1.000000 1 3.0 3.000000 100.000000 300.000000
positivo T810 (T810) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
realme RE549C (realme C31) 3.0 1.000000 3 0.0 0.000000 100.000000 0.000000
Lenovo J606F (Lenovo Tab P11) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
realme RED8AF (realme narzo 50A) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
realme RE58A5L1 (realme 10 Pro+ 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Pearl_K3_2021 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Redmi galahad (Redmi 9) 6.0 1.000000 6 6.0 1.000000 100.000000 100.000000
vivo V2248 (Y36 5G) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
vivo 2109 (V2109) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
HONOR HNANY-Q1 (HONOR X9) 2.0 1.000000 2 0.0 0.000000 100.000000 0.000000
vivo 2149 (Y21A) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola nio (moto g(100)) 1.0 1.000000 1 2.0 2.000000 100.000000 200.000000
Infinix Infinix-X668C (Infinix HOT 12 PRO) 3.0 1.000000 3 1.0 0.333333 100.000000 33.333333
vivo V2204 (Y16) 6.0 1.000000 6 5.0 0.833333 100.000000 83.333333
HONOR HNVNE-Q (HONOR X8 5G) 1.0 1.000000 1 1.0 1.000000 100.000000 100.000000
HONOR HWCOR (Honor Play) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Redmi cattail (Redmi 9) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
Blackview BV6200Pro (BV6200 Pro) 1.0 1.000000 1 4.0 4.000000 100.000000 400.000000
HONOR HWDUA-M (Honor 7A) 10.0 1.000000 10 9.0 0.900000 100.000000 90.000000
xiaomi vince (Redmi 5 Plus) 1.0 1.000000 1 0.0 0.000000 100.000000 0.000000
motorola fogos (moto g34 5G) 11.0 0.916667 12 18.0 1.500000 91.666667 150.000000
TECNO-BG7 10.0 0.909091 11 9.0 0.818182 90.909091 81.818182
samsung a04 (Galaxy A04) 8.0 0.888889 9 23.0 2.555556 88.888889 255.555556
TECNO TECNO-CK7n (CAMON 20 Pro) 6.0 0.857143 7 7.0 1.000000 85.714286 100.000000
Redmi dandelion (Redmi 9A) 12.0 0.857143 14 17.0 1.214286 85.714286 121.428571
ZTE P932F50 (ZTE Blade A3 2020) 6.0 0.857143 7 6.0 0.857143 85.714286 85.714286
Infinix Infinix-X6525B (SMRAT 8 PRO) 5.0 0.833333 6 10.0 1.666667 83.333333 166.666667
OPPO CPH1853 (CPH1853) 10.0 0.833333 12 22.0 1.833333 83.333333 183.333333
motorola rhode (moto g52) 10.0 0.833333 12 14.0 1.166667 83.333333 116.666667
Redmi angelica (Redmi 9C) 5.0 0.833333 6 3.0 0.500000 83.333333 50.000000
HUAWEI HWJKM-H (HUAWEI Y9 2019) 4.0 0.800000 5 2.0 0.400000 80.000000 40.000000
OPPO CPH1903 (CPH1903) 8.0 0.800000 10 7.0 0.700000 80.000000 70.000000
vivo V2250 (V29) 4.0 0.800000 5 3.0 0.600000 80.000000 60.000000
samsung a11q (Galaxy A11) 8.0 0.800000 10 19.0 1.900000 80.000000 190.000000
vivo V2254 (Y02t) 4.0 0.800000 5 3.0 0.600000 80.000000 60.000000
OPPO RMX1811 (realme C1) 4.0 0.800000 5 5.0 1.000000 80.000000 100.000000
HONOR HNREA (HONOR 90) 3.0 0.750000 4 1.0 0.250000 75.000000 25.000000
realme RE58C2 (realme C53) 3.0 0.750000 4 1.0 0.250000 75.000000 25.000000
vivo V2249 (Y27) 3.0 0.750000 4 3.0 0.750000 75.000000 75.000000
Redmi sapphire (Redmi Note 13) 3.0 0.750000 4 5.0 1.250000 75.000000 125.000000
samsung a34x (Galaxy A34 5G) 6.0 0.750000 8 14.0 1.750000 75.000000 175.000000
Redmi viva (Redmi Note 11 Pro) 3.0 0.750000 4 9.0 2.250000 75.000000 225.000000
realme RMX3231 (RMX3231) 6.0 0.750000 8 5.0 0.625000 75.000000 62.500000
Itel itel-L6502 (Vision1Pro) 3.0 0.750000 4 1.0 0.250000 75.000000 25.000000
Lenovo X104L (Lenovo Tab E10) 3.0 0.750000 4 7.0 1.750000 75.000000 175.000000
samsung j7y17lte (Galaxy J7) 5.0 0.714286 7 5.0 0.714286 71.428571 71.428571
xiaomi cactus (Redmi 6A) 5.0 0.714286 7 7.0 1.000000 71.428571 100.000000
TECNO TECNO-KG5m (TECNO SPARK Go 2022) 2.0 0.666667 3 2.0 0.666667 66.666667 66.666667
xiaomi mido ( Redmi Note 4) 4.0 0.666667 6 3.0 0.500000 66.666667 50.000000
samsung a22 (Galaxy A22) 2.0 0.666667 3 1.0 0.333333 66.666667 33.333333
TECNO-BD4a 4.0 0.666667 6 7.0 1.166667 66.666667 116.666667
OPPO OP5745L1 (A78) 4.0 0.666667 6 6.0 1.000000 66.666667 100.000000
samsung a31 (Galaxy A31) 2.0 0.666667 3 2.0 0.666667 66.666667 66.666667
samsung r8q (Galaxy S20 FE 5G) 2.0 0.666667 3 1.0 0.333333 66.666667 33.333333
samsung r9s (Galaxy S21 FE 5G) 2.0 0.666667 3 3.0 1.000000 66.666667 100.000000
samsung a25x (Galaxy A25 5G) 2.0 0.666667 3 1.0 0.333333 66.666667 33.333333
HONOR HNNTN (HONOR 50 Lite) 2.0 0.666667 3 3.0 1.000000 66.666667 100.000000
samsung j6lte (Galaxy J6) 4.0 0.666667 6 7.0 1.166667 66.666667 116.666667
Redmi begonia (Redmi Note 8 Pro) 2.0 0.666667 3 1.0 0.333333 66.666667 33.333333
HUAWEI HWMAR (HUAWEI P30 lite) 2.0 0.666667 3 6.0 2.000000 66.666667 200.000000
vivo 2034 (V2068) 2.0 0.666667 3 7.0 2.333333 66.666667 233.333333
DeutscheTelekom Leopard (T Phone ?2023?) 2.0 0.666667 3 1.0 0.333333 66.666667 33.333333
samsung j4lte (Galaxy J4) 4.0 0.666667 6 13.0 2.166667 66.666667 216.666667
motorola evert (moto g(6) plus) 2.0 0.666667 3 3.0 1.000000 66.666667 100.000000
samsung dream2lte (Galaxy S8+) 2.0 0.666667 3 2.0 0.666667 66.666667 66.666667
samsung a14m (Galaxy A14) 5.0 0.625000 8 8.0 1.000000 62.500000 100.000000
Infinix Infinix-X665B (HOT 12i) 3.0 0.600000 5 5.0 1.000000 60.000000 100.000000
realme RE87BAL1 (realme C35) 3.0 0.600000 5 16.0 3.200000 60.000000 320.000000
Redmi lemon (Redmi 9T) 3.0 0.600000 5 3.0 0.600000 60.000000 60.000000
Infinix Infinix-X653C (Smart 4) 3.0 0.600000 5 5.0 1.000000 60.000000 100.000000
HUAWEI HWATU-QG (HUAWEI Y6 2018) 4.0 0.571429 7 7.0 1.000000 57.142857 100.000000
vivo 1817 (vivo 1817) 4.0 0.571429 7 8.0 1.142857 57.142857 114.285714
HUAWEI HWDRA-MG (HUAWEI Y5 lite) 4.0 0.571429 7 3.0 0.428571 57.142857 42.857143
samsung a53x (Galaxy A53 5G) 5.0 0.555556 9 10.0 1.111111 55.555556 111.111111
Xiaomi toco (Mi Note 10 Lite) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
HONOR HWSTF (Honor 9) 3.0 0.500000 6 10.0 1.666667 50.000000 166.666667
samsung gta7litewifi (Galaxy Tab A7 Lite) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
samsung a73xq (Galaxy A73 5G) 2.0 0.500000 4 4.0 1.000000 50.000000 100.000000
Infinix Infinix-X6823C (SMART 6 PLUS) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
OPPO RMX1801 (realme 2 Pro) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Redmi water (Redmi A2+) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Itel itel-A662LM (itel A60s) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
Itel itel-W6501 (P36) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Itel itel-A665L (A70) 4.0 0.500000 8 20.0 2.500000 50.000000 250.000000
TECNO TECNO-CH6i (TECNO CAMON 19 Neo) 2.0 0.500000 4 3.0 0.750000 50.000000 75.000000
samsung t2s (Galaxy S21+ 5G) 1.0 0.500000 2 3.0 1.500000 50.000000 150.000000
TECNO TECNO-LH8n (POVA 5 Pro 5G) 2.0 0.500000 4 10.0 2.500000 50.000000 250.000000
TECNO TECNO-KH7n (TECNO SPARK 9 Pro) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
realme RMX3063 (RMX3063) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
TECNO TECNO-BB4k (SPARK 4 Lite) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
vivo 1724 (vivo 1724) 2.0 0.500000 4 9.0 2.250000 50.000000 225.000000
samsung gtaxlwifi (Galaxy Tab A (2016)) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
lge meh35lm (K52) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Nokia PDA_sprout (Nokia 5.1 Plus) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
motorola vicky (moto g72) 1.0 0.500000 2 4.0 2.000000 50.000000 200.000000
realme RED8C1L1 (realme 9i) 3.0 0.500000 6 4.0 0.666667 50.000000 66.666667
samsung a32 (Galaxy A32) 2.0 0.500000 4 2.0 0.500000 50.000000 50.000000
motorola cedric (Moto G (5th Gen)) 2.0 0.500000 4 6.0 1.500000 50.000000 150.000000
realme RECE4244 (realme C15 Qualcomm Edition) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
realme RE8DDCL1 (realme 10) 3.0 0.500000 6 11.0 1.833333 50.000000 183.333333
motorola capri (moto g(10)) 1.0 0.500000 2 8.0 4.000000 50.000000 400.000000
TECNO TECNO-LI9 (TECNO POVA 6 Pro 5G ) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Lenovo X306X (Lenovo Tab M10 HD (2nd Gen)) 1.0 0.500000 2 12.0 6.000000 50.000000 600.000000
xiaomi tissot_sprout (Mi A1) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
xiaomi tulip (Redmi Note 6 Pro) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
TECNO TECNO-KG5k (TECNO SPARK 8C) 1.0 0.500000 2 3.0 1.500000 50.000000 150.000000
samsung m23xq (Galaxy F23 5G) 2.0 0.500000 4 5.0 1.250000 50.000000 125.000000
samsung dreamlteks (Galaxy S8) 1.0 0.500000 2 4.0 2.000000 50.000000 200.000000
Transformer2 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Xiaomi riva (Redmi 5A) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
Infinix Infinix-X669 (Infinix HOT 30i) 3.0 0.500000 6 13.0 2.166667 50.000000 216.666667
Redmi selene (Redmi 10 2022) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
vivo V2206 (Y22s) 2.0 0.500000 4 4.0 1.000000 50.000000 100.000000
TCL Rio (TCL 403) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
Nokia DRD_sprout (Nokia 5.4) 1.0 0.500000 2 3.0 1.500000 50.000000 150.000000
vivo 1723 (vivo 1723) 1.0 0.500000 2 2.0 1.000000 50.000000 100.000000
Sparx Neo_7_Ultra (Neo 7 Ultra) 1.0 0.500000 2 1.0 0.500000 50.000000 50.000000
samsung a71 (Galaxy A71) 4.0 0.444444 9 8.0 0.888889 44.444444 88.888889
TECNO TECNO-KI5k (TECNO SPARK 10C) 3.0 0.428571 7 10.0 1.428571 42.857143 142.857143
OPPO OP5759L1 (A38) 2.0 0.400000 5 3.0 0.600000 40.000000 60.000000
Infinix Infinix-X6835B (Infinix HOT 30 PLAY) 2.0 0.400000 5 4.0 0.800000 40.000000 80.000000
samsung j6primelte (Galaxy J6+) 2.0 0.400000 5 5.0 1.000000 40.000000 100.000000
HUAWEI HWMRD-M1 (HUAWEI Y6 2019) 2.0 0.400000 5 11.0 2.200000 40.000000 220.000000
Nokia PHR (Nokia G50) 1.0 0.333333 3 3.0 1.000000 33.333333 100.000000
Infinix Infinix-X6711 (Infinix NOTE 30 5G) 1.0 0.333333 3 2.0 0.666667 33.333333 66.666667
samsung a52sxq (Galaxy A52s 5G) 1.0 0.333333 3 3.0 1.000000 33.333333 100.000000
Itel itel-S665L (itel S23) 1.0 0.333333 3 4.0 1.333333 33.333333 133.333333
samsung m11q (Galaxy M11) 1.0 0.333333 3 3.0 1.000000 33.333333 100.000000
realme RMX3191 (realme C25) 1.0 0.333333 3 2.0 0.666667 33.333333 66.666667
TECNO-Mobile TECNO-Mobile-KI5q (TECNO SPARK 10) 1.0 0.333333 3 2.0 0.666667 33.333333 66.666667
HUAWEI HWYAL (HUAWEI nova 5T) 1.0 0.333333 3 2.0 0.666667 33.333333 66.666667
realme RE5C94L1 (RMX3998) 1.0 0.333333 3 4.0 1.333333 33.333333 133.333333
samsung j7velte (Galaxy J7 Neo) 1.0 0.333333 3 3.0 1.000000 33.333333 100.000000
Xiaomi rolex (Redmi 4A) 2.0 0.333333 6 6.0 1.000000 33.333333 100.000000
Redmi cloud (Redmi A2) 1.0 0.333333 3 2.0 0.666667 33.333333 66.666667
Redmi secret (Redmi Note 10S) 2.0 0.333333 6 9.0 1.500000 33.333333 150.000000
Infinix Infinix-X657B (SMART) 1.0 0.250000 4 4.0 1.000000 25.000000 100.000000
OPPO OP4F43L1 (F19 Pro) 1.0 0.250000 4 5.0 1.250000 25.000000 125.000000
motorola namath (Moto C) 1.0 0.250000 4 3.0 0.750000 25.000000 75.000000
gm gm5plus_s_sprout (GM5 Plus) 1.0 0.250000 4 3.0 0.750000 25.000000 75.000000
samsung a20 (Galaxy A20) 1.0 0.250000 4 4.0 1.000000 25.000000 100.000000
samsung a22x (Galaxy A22 5G) 1.0 0.200000 5 8.0 1.600000 20.000000 160.000000
BT-840 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
CUBOT KINGKONGMINI3 (KINGKONG MINI 3) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Cricket U380AC (Cricket Debut S2) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo V2231 (V27) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
xiaomi ugg (Redmi Note 5A) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung a5xelte (Galaxy A5(2016)) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung a6plte (Galaxy A6+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung a72q (Galaxy A72) 0.0 0.000000 3 5.0 1.666667 0.000000 166.666667
samsung beyond2 (Galaxy S10+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung d2s (Galaxy Note10+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung a50s (Galaxy A50s) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung a2corelte (Galaxy A2 Core) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
HONOR HWBND-H (Honor 7X) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RMX1971 (realme 5pro) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RMX2002L1 (realme 6) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RMX2040 (RMX2040) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
realme RMX2189 (realme C12) 0.0 0.000000 2 5.0 2.500000 0.000000 250.000000
realme RMX2202L1 (realme GT 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung g0s (Galaxy S22+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung gta4xlvewifi (Galaxy Tab S6 Lite) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung gta9pwifi (Galaxy Tab A9+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RE5C42 (realme C65) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
realme RE5C6CL1 (realme 11 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RED8D7 (RMX3268) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
motorola tonga (moto g power (2022)) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
motorola tundra (motorola edge 30 fusion) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
realme RE54ABL1 (realme GT Master Edition 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
realme RE54CBL1 (realme 9Pro 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
motorola nairo (motorola one 5G UW) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
motorola rhodep (moto g82 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
motorola guam (moto e(7) plus) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
lge mmh55lm (LG K40S) 0.0 0.000000 2 4.0 2.000000 0.000000 200.000000
lge mmh6lm (LM-X120) 0.0 0.000000 3 5.0 1.666667 0.000000 166.666667
motorola aruba (moto e20) 0.0 0.000000 4 4.0 1.000000 0.000000 100.000000
samsung gts3llte (Galaxy Tab S3) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung herolte (Galaxy S7 ) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
samsung j2corey20lte (Galaxy J2 Core) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
lge mdh40lm (K61) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
HUAWEI HWAGS2 (HUAWEI MediaPad T5) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
HUAWEI HWBG2 (MediaPad T3 7) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
HUAWEI HWDRA-M (HUAWEI Y5 Prime 2018) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
asus ASUS_X01BD_2 (ZenFone Max Pro M2 (ZB631KL) (WW) / Max Pro M2 (ZB630KL) (IN)) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
docomo SO-02J (Xperia X Compact) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
frost 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
lge elsa (V20) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
lge mdh10xlm (LG Harmony 4) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
Xiaomi renoir (Mi 11 Lite 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
asus ASUS_X00LD_1 (ZenFone 4 Selfie (ZD553KL)) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
asus ASUS_X00T_2 (ZenFone Max Pro M1 (ZB602KL) (WW) / Max Pro M1 (ZB601KL) (IN)) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung jadelte (Galaxy J7+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung m22 (Galaxy M22) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung m33x (Galaxy Jump2) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Teclast P85T (P85T_ROW) 0.0 0.000000 1 6.0 6.000000 0.000000 600.000000
WIKO W-K130 (Sunny4) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
Xiaomi davinci (Redmi K20) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
Xiaomi laurel_sprout (Mi A3) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-KJ5 (TECNO SPARK 20) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-LC7 ( Pouvoir 4) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
TECNO TECNO-CK8n (CAMON 20 Pro 5G ) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-CL6 (TECNO CAMON 30) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
TECNO TECNO-KF6p (TECNO SPARK 7T) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-KG6 (TECNO SPARK 8) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
TECNO TECNO-BC2 (TECNO POP 4) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-BD2d (TECNO POP 5) 0.0 0.000000 2 4.0 2.000000 0.000000 200.000000
TECNO TECNO-CD6 (CAMON 15 Air) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
TECNO TECNO-CH6n (CAMON 18) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
samsung p3s (Galaxy S21 Ultra 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung star2qltesq (Galaxy S9+) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
samsung starqltechn (Galaxy S9) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
samsung zerolte (Galaxy S6 edge) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Redmi zircon (Redmi Note 13 Pro+ 5G) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
SKYDevices SKY_PADMaxNAL (SKY PAD10Max) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Sony G8231 (Xperia XZs) 0.0 0.000000 1 5.0 5.000000 0.000000 500.000000
TCL U3A_PLUS_4G (GO5c) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Infinix Infinix-X6511 (SMART 5 HD) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Redmi sapphiren (Redmi Note 13) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Redmi rain (Redmi 10C) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Redmi rosemary (Redmi Note 10S) 0.0 0.000000 3 3.0 1.000000 0.000000 100.000000
HYUNDAI E553 (E553) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Redmi light (Redmi 11 Prime 5G) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
POCO xaga (POCO X4 GT) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Philco HIT_P10a (HIT P10a) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Philco Hit_P12 (Hit P12) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
POCO chopin (Redmi Note 10 Pro) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
POCO ingres (POCO F4 GT) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
POCO marble (POCO F5) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
OPPO OP571DL1 (CPH2385) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
OPPO OP5ABFL1 (Reno 11) 0.0 0.000000 2 5.0 2.500000 0.000000 250.000000
OPPO OP4F11L1 (CPH2219) 0.0 0.000000 3 4.0 1.333333 0.000000 133.333333
OPPO OP5637L1 (CPH2473) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
OPPO OP4863 (F11 Pro) 0.0 0.000000 1 9.0 9.000000 0.000000 900.000000
Infinix Infinix-X665C (HOT 20i) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
OPPO A73t (A73t) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
OPPO CPH1729 (CPH1729) 0.0 0.000000 4 4.0 1.000000 0.000000 100.000000
Nokia HKEA (Nokia C22) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
KDDI SOV37 (Xperia XZ2) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Lenovo P2a42 (P2) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Movitel M8424 (M8424) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Nokia DMN (Nokia C2 2nd Edition) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
GM G310 (GM 21 ) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
GM G312 (GM 22 ) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
Itel itel-A666L (itel P55) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Itel itel-L6005 (itel Vision1(P36 Play)) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Itel itel-W5005P (P15) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
Infinix Infinix-X6835 (Infinix HOT 30 PLAY) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Infinix Infinix-X6850 (Infinix NOTE 40 Pro) 0.0 0.000000 1 3.0 3.000000 0.000000 300.000000
Infinix Infinix-X6851B (NOTE 40 Pro+ 5G) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Infinix Infinix-X687 (Infinix ZERO8) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Infinix Infinix-X689F (HOT) 0.0 0.000000 1 8.0 8.000000 0.000000 800.000000
Infinix Infinix-X692 (NOTE 8) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
Infinix Infinix-X676C (Infinix NOTE 12 2023) 0.0 0.000000 2 2.0 1.000000 0.000000 100.000000
Infinix Infinix-X6825 (HOT 20 PLAY) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo V2315 (Y28 5G) 0.0 0.000000 2 8.0 4.000000 0.000000 400.000000
HONOR HNNTH (HONOR 50) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo 2036 (V2031) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo 2110 (V2152) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo 2116 (V2117) 0.0 0.000000 1 1.0 1.000000 0.000000 100.000000
vivo 2131 (V2140) 0.0 0.000000 1 2.0 2.000000 0.000000 200.000000
✓ Chart saved: batch2_crash_rate_by_device.png
======================================================================
BATCH 2 COMPLETE - Crash Rate Analysis Done
======================================================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
# Calculate OS metrics
os_metrics = df.groupby('Android OS Version').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_metrics.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
os_metrics['Crash_Rate'] = (os_metrics['Total_Crashes'] / os_metrics['Record_Count']) * 100
os_metrics['ANR_Rate'] = (os_metrics['Total_ANRs'] / os_metrics['Record_Count']) * 100
os_metrics = os_metrics.sort_values('Crash_Rate', ascending=False)
# PLOT 1: Crash Rate by OS Version
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Crash Rate
bars1 = axes[0].barh(range(len(os_metrics)), os_metrics['Crash_Rate'],
color='#3498db', alpha=0.8, edgecolor='black')
for i in range(min(3, len(bars1))):
bars1[i].set_color('#e74c3c')
bars1[i].set_alpha(1.0)
axes[0].set_yticks(range(len(os_metrics)))
axes[0].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Crash Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()
for i, v in enumerate(os_metrics['Crash_Rate']):
axes[0].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
# ANR Rate
bars2 = axes[1].barh(range(len(os_metrics)), os_metrics['ANR_Rate'],
color='#9b59b6', alpha=0.8, edgecolor='black')
for i in range(min(3, len(bars2))):
bars2[i].set_color('#f39c12')
bars2[i].set_alpha(1.0)
axes[1].set_yticks(range(len(os_metrics)))
axes[1].set_yticklabels(os_metrics['OS_Version'], fontsize=10)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('ANR Rate by Android OS Version', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()
for i, v in enumerate(os_metrics['ANR_Rate']):
axes[1].text(v + 0.5, i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('batch2_crash_rate_by_os.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch2_crash_rate_by_os.png")
# PLOT 2: Crash Rate by Device (if available)
if 'Device' in df.columns:
device_metrics = df.groupby('Device').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
device_metrics.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
device_metrics['Crash_Rate'] = (device_metrics['Total_Crashes'] / device_metrics['Record_Count']) * 100
device_metrics['ANR_Rate'] = (device_metrics['Total_ANRs'] / device_metrics['Record_Count']) * 100
device_metrics = device_metrics.sort_values('Crash_Rate', ascending=False)
top_devices = device_metrics.head(15)
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
# Crash Rate
bars1 = axes[0].barh(range(len(top_devices)), top_devices['Crash_Rate'],
color='#3498db', alpha=0.8, edgecolor='black')
for i in range(min(5, len(bars1))):
bars1[i].set_color('#e74c3c')
bars1[i].set_alpha(1.0)
axes[0].set_yticks(range(len(top_devices)))
axes[0].set_yticklabels(top_devices['Device'], fontsize=9)
axes[0].set_xlabel('Crash Rate (% of records)', fontsize=12, fontweight='bold')
axes[0].set_title('Top 15 Devices by Crash Rate', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='x')
axes[0].invert_yaxis()
# ANR Rate
bars2 = axes[1].barh(range(len(top_devices)), top_devices['ANR_Rate'],
color='#9b59b6', alpha=0.8, edgecolor='black')
for i in range(min(5, len(bars2))):
bars2[i].set_color('#f39c12')
bars2[i].set_alpha(1.0)
axes[1].set_yticks(range(len(top_devices)))
axes[1].set_yticklabels(top_devices['Device'], fontsize=9)
axes[1].set_xlabel('ANR Rate (% of records)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 15 Devices by ANR Rate', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')
axes[1].invert_yaxis()
plt.tight_layout()
plt.savefig('batch2_crash_rate_by_device.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch2_crash_rate_by_device.png")
✓ Chart saved: batch2_crash_rate_by_os.png
✓ Chart saved: batch2_crash_rate_by_device.png
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
print("="*70)
print("BATCH 3: TOP OFFENDING OS VERSIONS & DEVICES")
print("="*70)
# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
print(f"Dataset loaded: {len(df):,} records")
# ===========================================================================
# 4. TOP OFFENDING ANDROID OS VERSIONS
# ===========================================================================
print("\n4. TOP OFFENDING ANDROID OS VERSIONS")
print("-" * 70)
os_detailed = df.groupby('Android OS Version').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_detailed.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
os_detailed['Crash_Rate'] = (os_detailed['Total_Crashes'] / os_detailed['Record_Count']) * 100
# A. Top by ABSOLUTE crash count
top_absolute_os = os_detailed.nlargest(10, 'Total_Crashes')
print("\n TOP 10 OS VERSIONS BY ABSOLUTE CRASH COUNT:")
print(top_absolute_os[['OS_Version', 'Total_Crashes', 'Total_ANRs', 'Record_Count', 'Crash_Rate']].to_string(index=False))
# B. Top by RELATIVE crash rate (minimum 10 records)
top_relative_os = os_detailed[os_detailed['Record_Count'] >= 10].nlargest(10, 'Crash_Rate')
print("\n\n TOP 10 OS VERSIONS BY RELATIVE CRASH RATE (min 10 records):")
print(top_relative_os[['OS_Version', 'Crash_Rate', 'Total_Crashes', 'Record_Count']].to_string(index=False))
# Visualization - Top Offending OS Versions
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
# Absolute crashes
colors_abs = ['#e74c3c' if i < 3 else '#3498db' for i in range(len(top_absolute_os))]
bars1 = axes[0].barh(range(len(top_absolute_os)), top_absolute_os['Total_Crashes'],
color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_os)))
axes[0].set_yticklabels(top_absolute_os['OS_Version'], fontsize=10)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 10 OS: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')
# Add value labels
for i, v in enumerate(top_absolute_os['Total_Crashes']):
axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=9, fontweight='bold')
# Relative crash rate
colors_rel = ['#e74c3c' if i < 3 else '#f39c12' for i in range(len(top_relative_os))]
bars2 = axes[1].barh(range(len(top_relative_os)), top_relative_os['Crash_Rate'],
color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_os)))
axes[1].set_yticklabels(top_relative_os['OS_Version'], fontsize=10)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 10 OS: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')
# Add value labels
for i, v in enumerate(top_relative_os['Crash_Rate']):
axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('batch3_top_offending_os.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch3_top_offending_os.png")
plt.close()
# ===========================================================================
# 5. TOP OFFENDING DEVICES (if Device column exists)
# ===========================================================================
if 'Device' in df.columns:
print("\n\n5. TOP OFFENDING DEVICES")
print("-" * 70)
device_detailed = df.groupby('Device').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
device_detailed.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
device_detailed['Crash_Rate'] = (device_detailed['Total_Crashes'] / device_detailed['Record_Count']) * 100
# A. Top by ABSOLUTE crash count
top_absolute_device = device_detailed.nlargest(15, 'Total_Crashes')
print("\n📊 TOP 15 DEVICES BY ABSOLUTE CRASH COUNT:")
print(top_absolute_device[['Device', 'Total_Crashes', 'Total_ANRs', 'Record_Count', 'Crash_Rate']].to_string(index=False))
# B. Top by RELATIVE crash rate (minimum 10 records)
top_relative_device = device_detailed[device_detailed['Record_Count'] >= 10].nlargest(15, 'Crash_Rate')
print("\n\n📊 TOP 15 DEVICES BY RELATIVE CRASH RATE (min 10 records):")
print(top_relative_device[['Device', 'Crash_Rate', 'Total_Crashes', 'Record_Count']].to_string(index=False))
# Visualization - Top Offending Devices
fig, axes = plt.subplots(1, 2, figsize=(16, 10))
# Absolute crashes
colors_abs = ['#e74c3c' if i < 5 else '#3498db' for i in range(len(top_absolute_device))]
bars1 = axes[0].barh(range(len(top_absolute_device)), top_absolute_device['Total_Crashes'],
color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_device)))
axes[0].set_yticklabels(top_absolute_device['Device'], fontsize=9)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 15 Devices: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')
# Add value labels
for i, v in enumerate(top_absolute_device['Total_Crashes']):
axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=8, fontweight='bold')
# Relative crash rate
colors_rel = ['#e74c3c' if i < 5 else '#f39c12' for i in range(len(top_relative_device))]
bars2 = axes[1].barh(range(len(top_relative_device)), top_relative_device['Crash_Rate'],
color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_device)))
axes[1].set_yticklabels(top_relative_device['Device'], fontsize=9)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 15 Devices: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')
# Add value labels
for i, v in enumerate(top_relative_device['Crash_Rate']):
axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=8, fontweight='bold')
plt.tight_layout()
plt.savefig('batch3_top_offending_devices.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: batch3_top_offending_devices.png")
plt.close()
else:
print("\n\n5. DEVICE ANALYSIS SKIPPED")
print("-" * 70)
print("No Device column found in dataset.")
# Summary of top offenders
print("\n\n" + "="*70)
print("SUMMARY: KEY OFFENDERS IDENTIFIED")
print("="*70)
print(f"\n WORST OS VERSION (Absolute): {top_absolute_os.iloc[0]['OS_Version']}")
print(f" Total Crashes: {top_absolute_os.iloc[0]['Total_Crashes']:,.0f}")
print(f" Crash Rate: {top_absolute_os.iloc[0]['Crash_Rate']:.2f}%")
print(f"\n WORST OS VERSION (Relative): {top_relative_os.iloc[0]['OS_Version']}")
print(f" Crash Rate: {top_relative_os.iloc[0]['Crash_Rate']:.2f}%")
print(f" Total Crashes: {top_relative_os.iloc[0]['Total_Crashes']:,.0f}")
if 'Device' in df.columns:
print(f"\n WORST DEVICE (Absolute): {top_absolute_device.iloc[0]['Device']}")
print(f" Total Crashes: {top_absolute_device.iloc[0]['Total_Crashes']:,.0f}")
======================================================================
BATCH 3: TOP OFFENDING OS VERSIONS & DEVICES
======================================================================
Dataset loaded: 3,391 records
4. TOP OFFENDING ANDROID OS VERSIONS
----------------------------------------------------------------------
TOP 10 OS VERSIONS BY ABSOLUTE CRASH COUNT:
OS_Version Total_Crashes Total_ANRs Record_Count Crash_Rate
Android 14 1076.0 4176.0 28 3842.857143
Android 9 679.0 182.0 28 2425.000000
Android 13 381.0 325.0 28 1360.714286
Android 12 290.0 562.0 28 1035.714286
Android 11 238.0 375.0 28 850.000000
Android 10 195.0 330.0 28 696.428571
Android 8.1 135.0 243.0 28 482.142857
Android 7.1 93.0 64.0 23 404.347826
Android 8.0 49.0 21.0 22 222.727273
Android 15 44.0 180.0 28 157.142857
TOP 10 OS VERSIONS BY RELATIVE CRASH RATE (min 10 records):
OS_Version Crash_Rate Total_Crashes Record_Count
Android 14 3842.857143 1076.0 28
Android 9 2425.000000 679.0 28
Android 13 1360.714286 381.0 28
Android 12 1035.714286 290.0 28
Android 11 850.000000 238.0 28
Android 10 696.428571 195.0 28
Android 8.1 482.142857 135.0 28
Android 7.1 404.347826 93.0 23
Android 8.0 222.727273 49.0 22
Android 15 157.142857 44.0 28
✓ Chart saved: batch3_top_offending_os.png
5. TOP OFFENDING DEVICES
----------------------------------------------------------------------
📊 TOP 15 DEVICES BY ABSOLUTE CRASH COUNT:
Device Total_Crashes Total_ANRs Record_Count Crash_Rate
Redmi fire (Redmi 12) 98.0 202.0 13 753.846154
BLU B130DL (B131DL) 86.0 323.0 8 1075.000000
samsung j4primelte (Galaxy J4+) 85.0 163.0 11 772.727273
motorola cancun (moto g14) 79.0 154.0 5 1580.000000
POCO vayu (POCO X3 Pro) 66.0 161.0 5 1320.000000
motorola malta (moto e(7)) 64.0 138.0 13 492.307692
Redmi miel (Redmi Note 11S) 63.0 185.0 2 3150.000000
HONOR HWLLD-H (Honor 9 Lite) 62.0 165.0 10 620.000000
Redmi topaz (Redmi Note 12) 61.0 33.0 11 554.545455
Redmi tapas (Redmi Note 12) 58.0 61.0 14 414.285714
vivo 1904 (vivo 1904) 58.0 20.0 13 446.153846
samsung a10 (Galaxy A10) 57.0 157.0 5 1140.000000
vivo 2027 (V2029) 55.0 166.0 5 1100.000000
POCO duchamp (POCO X6 Pro 5G) 52.0 18.0 6 866.666667
vivo V2333 (Y03) 51.0 171.0 13 392.307692
📊 TOP 15 DEVICES BY RELATIVE CRASH RATE (min 10 records):
Device Crash_Rate Total_Crashes Record_Count
samsung j4primelte (Galaxy J4+) 772.727273 85.0 11
Redmi fire (Redmi 12) 753.846154 98.0 13
HONOR HWLLD-H (Honor 9 Lite) 620.000000 62.0 10
Redmi topaz (Redmi Note 12) 554.545455 61.0 11
motorola malta (moto e(7)) 492.307692 64.0 13
vivo 1904 (vivo 1904) 446.153846 58.0 13
Redmi tapas (Redmi Note 12) 414.285714 58.0 14
samsung j2corelte (Galaxy J2 Core) 400.000000 40.0 10
vivo V2333 (Y03) 392.307692 51.0 13
motorola bali (moto e6 play) 358.333333 43.0 12
Infinix Infinix-X688B (HOT Play) 350.000000 35.0 10
samsung a14x (Galaxy A14 5G) 342.857143 48.0 14
samsung a05m (Galaxy A05) 320.000000 48.0 15
samsung a14 (Galaxy A14) 320.000000 48.0 15
samsung j4corelte (Galaxy J4 Core) 308.333333 37.0 12
✓ Chart saved: batch3_top_offending_devices.png
======================================================================
SUMMARY: KEY OFFENDERS IDENTIFIED
======================================================================
WORST OS VERSION (Absolute): Android 14
Total Crashes: 1,076
Crash Rate: 3842.86%
WORST OS VERSION (Relative): Android 14
Crash Rate: 3842.86%
Total Crashes: 1,076
WORST DEVICE (Absolute): Redmi fire (Redmi 12)
Total Crashes: 98
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
# Calculate OS detailed metrics
os_detailed = df.groupby('Android OS Version').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
os_detailed.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
os_detailed['Crash_Rate'] = (os_detailed['Total_Crashes'] / os_detailed['Record_Count']) * 100
# Top OS by absolute and relative
top_absolute_os = os_detailed.nlargest(10, 'Total_Crashes')
top_relative_os = os_detailed[os_detailed['Record_Count'] >= 10].nlargest(10, 'Crash_Rate')
# PLOT 1: Top Offending OS Versions
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
# Absolute crashes
colors_abs = ['#e74c3c' if i < 3 else '#3498db' for i in range(len(top_absolute_os))]
bars1 = axes[0].barh(range(len(top_absolute_os)), top_absolute_os['Total_Crashes'],
color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_os)))
axes[0].set_yticklabels(top_absolute_os['OS_Version'], fontsize=10)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 10 OS: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')
for i, v in enumerate(top_absolute_os['Total_Crashes']):
axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=9, fontweight='bold')
# Relative crash rate
colors_rel = ['#e74c3c' if i < 3 else '#f39c12' for i in range(len(top_relative_os))]
bars2 = axes[1].barh(range(len(top_relative_os)), top_relative_os['Crash_Rate'],
color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_os)))
axes[1].set_yticklabels(top_relative_os['OS_Version'], fontsize=10)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 10 OS: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')
for i, v in enumerate(top_relative_os['Crash_Rate']):
axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=9, fontweight='bold')
plt.tight_layout()
plt.savefig('batch3_top_offending_os.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch3_top_offending_os.png")
# PLOT 2: Top Offending Devices (if available)
if 'Device' in df.columns:
device_detailed = df.groupby('Device').agg({
'Daily Crashes': ['sum', 'mean', 'count'],
'Daily ANRs': ['sum', 'mean']
}).reset_index()
device_detailed.columns = ['Device', 'Total_Crashes', 'Avg_Crashes', 'Record_Count',
'Total_ANRs', 'Avg_ANRs']
device_detailed['Crash_Rate'] = (device_detailed['Total_Crashes'] / device_detailed['Record_Count']) * 100
top_absolute_device = device_detailed.nlargest(15, 'Total_Crashes')
top_relative_device = device_detailed[device_detailed['Record_Count'] >= 10].nlargest(15, 'Crash_Rate')
fig, axes = plt.subplots(1, 2, figsize=(16, 10))
# Absolute crashes
colors_abs = ['#e74c3c' if i < 5 else '#3498db' for i in range(len(top_absolute_device))]
bars1 = axes[0].barh(range(len(top_absolute_device)), top_absolute_device['Total_Crashes'],
color=colors_abs, alpha=0.8, edgecolor='black')
axes[0].set_yticks(range(len(top_absolute_device)))
axes[0].set_yticklabels(top_absolute_device['Device'], fontsize=9)
axes[0].set_xlabel('Total Crashes', fontsize=12, fontweight='bold')
axes[0].set_title('Top 15 Devices: Highest Absolute Crashes', fontsize=13, fontweight='bold')
axes[0].invert_yaxis()
axes[0].grid(True, alpha=0.3, axis='x')
for i, v in enumerate(top_absolute_device['Total_Crashes']):
axes[0].text(v + (v*0.02), i, f'{v:,.0f}', va='center', fontsize=8, fontweight='bold')
# Relative crash rate
colors_rel = ['#e74c3c' if i < 5 else '#f39c12' for i in range(len(top_relative_device))]
bars2 = axes[1].barh(range(len(top_relative_device)), top_relative_device['Crash_Rate'],
color=colors_rel, alpha=0.8, edgecolor='black')
axes[1].set_yticks(range(len(top_relative_device)))
axes[1].set_yticklabels(top_relative_device['Device'], fontsize=9)
axes[1].set_xlabel('Crash Rate (%)', fontsize=12, fontweight='bold')
axes[1].set_title('Top 15 Devices: Highest Relative Crash Rate', fontsize=13, fontweight='bold')
axes[1].invert_yaxis()
axes[1].grid(True, alpha=0.3, axis='x')
for i, v in enumerate(top_relative_device['Crash_Rate']):
axes[1].text(v + (v*0.02), i, f'{v:.1f}%', va='center', fontsize=8, fontweight='bold')
plt.tight_layout()
plt.savefig('batch3_top_offending_devices.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: batch3_top_offending_devices.png")
✓ Chart saved: batch3_top_offending_os.png
✓ Chart saved: batch3_top_offending_devices.png
Data-forensic analyses (how to use Crashes, ANRs, Device type, OS version)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')
# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
# Data preprocessing
df.columns = df.columns.str.strip()
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
print("="*70)
print("B. ANOMALY DETECTION & STATISTICAL ANALYSIS")
print("="*70)
# 1. GROUP COMPARISON: Crash and ANR rates by Device Type and OS Version
print("\n1. GROUP COMPARISON ANALYSIS")
print("-" * 70)
device_col = 'Device Type' if 'Device Type' in df.columns else 'Device Model'
if device_col in df.columns:
# By Device Type
device_comparison = df.groupby(device_col).agg({
'Daily Crashes': ['sum', 'mean', 'std'],
'Daily ANRs': ['sum', 'mean', 'std'],
device_col: 'count'
}).reset_index()
device_comparison.columns = [device_col, 'Total_Crashes', 'Avg_Crashes', 'Std_Crashes',
'Total_ANRs', 'Avg_ANRs', 'Std_ANRs', 'Sample_Size']
device_comparison['Crash_Rate'] = (device_comparison['Total_Crashes'] /
device_comparison['Sample_Size']) * 100
device_comparison['ANR_Rate'] = (device_comparison['Total_ANRs'] /
device_comparison['Sample_Size']) * 100
print("\n📊 Device Type Comparison:")
print(device_comparison[[device_col, 'Sample_Size', 'Avg_Crashes', 'Avg_ANRs',
'Crash_Rate', 'ANR_Rate']].to_string(index=False))
# By OS Version
os_comparison = df.groupby('Android OS Version').agg({
'Daily Crashes': ['sum', 'mean', 'std'],
'Daily ANRs': ['sum', 'mean', 'std'],
'Android OS Version': 'count'
}).reset_index()
os_comparison.columns = ['OS_Version', 'Total_Crashes', 'Avg_Crashes', 'Std_Crashes',
'Total_ANRs', 'Avg_ANRs', 'Std_ANRs', 'Sample_Size']
os_comparison['Crash_Rate'] = (os_comparison['Total_Crashes'] /
os_comparison['Sample_Size']) * 100
os_comparison['ANR_Rate'] = (os_comparison['Total_ANRs'] /
os_comparison['Sample_Size']) * 100
print("\n📊 OS Version Comparison:")
print(os_comparison[['OS_Version', 'Sample_Size', 'Avg_Crashes', 'Avg_ANRs',
'Crash_Rate', 'ANR_Rate']].to_string(index=False))
# Visualization - Group Comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
# Device Type - Crashes
if device_col in df.columns:
axes[0, 0].bar(device_comparison[device_col], device_comparison['Avg_Crashes'],
color='#e74c3c', alpha=0.7)
axes[0, 0].set_title('Average Crashes by Device Type', fontsize=12, fontweight='bold')
axes[0, 0].set_ylabel('Avg Crashes')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3, axis='y')
# Device Type - ANRs
axes[0, 1].bar(device_comparison[device_col], device_comparison['Avg_ANRs'],
color='#f39c12', alpha=0.7)
axes[0, 1].set_title('Average ANRs by Device Type', fontsize=12, fontweight='bold')
axes[0, 1].set_ylabel('Avg ANRs')
axes[0, 1].tick_params(axis='x', rotation=45)
axes[0, 1].grid(True, alpha=0.3, axis='y')
# OS Version - Crashes
axes[1, 0].bar(os_comparison['OS_Version'], os_comparison['Avg_Crashes'],
color='#3498db', alpha=0.7)
axes[1, 0].set_title('Average Crashes by OS Version', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Avg Crashes')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3, axis='y')
# OS Version - ANRs
axes[1, 1].bar(os_comparison['OS_Version'], os_comparison['Avg_ANRs'],
color='#9b59b6', alpha=0.7)
axes[1, 1].set_title('Average ANRs by OS Version', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Avg ANRs')
axes[1, 1].tick_params(axis='x', rotation=45)
axes[1, 1].grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('group_comparison.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: group_comparison.png")
# 2. CHI-SQUARE TEST: Association between Crash occurrence and Device Type
print("\n\n2. CHI-SQUARE TEST (ASSOCIATION ANALYSIS)")
print("-" * 70)
# Create binary crash indicator
df['Has_Crash'] = (df['Daily Crashes'] > 0).astype(int)
if device_col in df.columns:
# Contingency table
contingency_table = pd.crosstab(df[device_col], df['Has_Crash'],
margins=True, margins_name='Total')
print("\n📋 Contingency Table: Crash Occurrence vs Device Type")
print(contingency_table)
# Chi-square test
contingency_for_test = pd.crosstab(df[device_col], df['Has_Crash'])
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_for_test)
print("\n🔬 Chi-Square Test Results:")
print(f" Chi-Square Statistic: {chi2:.4f}")
print(f" P-value: {p_value:.6f}")
print(f" Degrees of Freedom: {dof}")
alpha = 0.05
if p_value < alpha:
print(f"\n✓ Result: SIGNIFICANT association detected (p < {alpha})")
print(f" There IS a statistically significant relationship between")
print(f" Device Type and Crash occurrence.")
else:
print(f"\n✗ Result: NO significant association (p >= {alpha})")
print(f" Device Type and Crash occurrence are independent.")
# Effect size (Cramér's V)
n = contingency_for_test.sum().sum()
cramers_v = np.sqrt(chi2 / (n * (min(contingency_for_test.shape) - 1)))
print(f" Cramér's V (effect size): {cramers_v:.4f}")
if cramers_v < 0.1:
effect = "negligible"
elif cramers_v < 0.3:
effect = "small"
elif cramers_v < 0.5:
effect = "medium"
else:
effect = "large"
print(f" Effect size interpretation: {effect}")
# 3. LOGISTIC REGRESSION: Predict crash probability
print("\n\n3. LOGISTIC REGRESSION ANALYSIS")
print("-" * 70)
# Prepare features
df['Hour'] = df['Date'].dt.hour
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['Month'] = df['Date'].dt.month
# Encode categorical variables
le_device = LabelEncoder()
le_os = LabelEncoder()
if device_col in df.columns:
df['Device_Encoded'] = le_device.fit_transform(df[device_col].astype(str))
df['OS_Encoded'] = le_os.fit_transform(df['Android OS Version'].astype(str))
# Select features
feature_cols = ['OS_Encoded', 'Hour', 'Month', 'DayOfWeek']
if device_col in df.columns:
feature_cols.insert(0, 'Device_Encoded')
# Handle missing values
df_model = df[feature_cols + ['Has_Crash']].dropna()
X = df_model[feature_cols]
y = df_model['Has_Crash']
# Train logistic regression
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X, y)
# Results
print("\n📈 Logistic Regression Model Performance:")
print(f" Model Accuracy: {log_reg.score(X, y):.4f}")
print(f" Baseline (all no-crash): {1 - y.mean():.4f}")
# Coefficients and odds ratios
coef_df = pd.DataFrame({
'Feature': feature_cols,
'Coefficient': log_reg.coef_[0],
'Odds_Ratio': np.exp(log_reg.coef_[0])
})
coef_df = coef_df.sort_values('Coefficient', ascending=False)
print("\n📊 Feature Coefficients & Odds Ratios:")
print(coef_df.to_string(index=False))
print("\n🔍 Interpretation:")
for idx, row in coef_df.iterrows():
if row['Coefficient'] > 0.1:
pct_increase = (row['Odds_Ratio'] - 1) * 100
print(f" • {row['Feature']}: INCREASES crash odds by {pct_increase:.1f}%")
elif row['Coefficient'] < -0.1:
pct_decrease = (1 - row['Odds_Ratio']) * 100
print(f" • {row['Feature']}: DECREASES crash odds by {pct_decrease:.1f}%")
# Visualization - Odds Ratios
fig, ax = plt.subplots(figsize=(10, 6))
colors = ['#e74c3c' if x > 1 else '#27ae60' for x in coef_df['Odds_Ratio']]
bars = ax.barh(coef_df['Feature'], coef_df['Odds_Ratio'], color=colors, alpha=0.7)
ax.axvline(x=1, color='black', linestyle='--', linewidth=2, label='No Effect')
ax.set_xlabel('Odds Ratio', fontsize=12, fontweight='bold')
ax.set_title('Logistic Regression: Feature Impact on Crash Probability',
fontsize=13, fontweight='bold')
ax.set_xlim(0.5, max(coef_df['Odds_Ratio'].max() * 1.1, 1.5))
ax.grid(True, alpha=0.3, axis='x')
ax.legend()
plt.tight_layout()
plt.savefig('logistic_regression_odds.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: logistic_regression_odds.png")
# 4. ANOMALY DETECTION (Z-Score Method)
print("\n\n4. ANOMALY DETECTION")
print("-" * 70)
# Calculate z-scores for crashes
df['Crash_ZScore'] = np.abs(stats.zscore(df['Daily Crashes']))
df['ANR_ZScore'] = np.abs(stats.zscore(df['Daily ANRs']))
# Identify anomalies (z-score > 3)
crash_anomalies = df[df['Crash_ZScore'] > 3].sort_values('Daily Crashes', ascending=False)
anr_anomalies = df[df['ANR_ZScore'] > 3].sort_values('Daily ANRs', ascending=False)
print(f"\n⚠ Crash Anomalies Detected: {len(crash_anomalies)}")
if len(crash_anomalies) > 0:
print("\nTop 5 Crash Anomalies:")
cols_to_show = ['Date', 'Daily Crashes', device_col if device_col in df.columns else 'Android OS Version',
'Android OS Version', 'Crash_ZScore']
cols_to_show = [c for c in cols_to_show if c in crash_anomalies.columns]
print(crash_anomalies[cols_to_show].head(5).to_string(index=False))
print(f"\n⚠ ANR Anomalies Detected: {len(anr_anomalies)}")
if len(anr_anomalies) > 0:
print("\nTop 5 ANR Anomalies:")
cols_to_show = ['Date', 'Daily ANRs', device_col if device_col in df.columns else 'Android OS Version',
'Android OS Version', 'ANR_ZScore']
cols_to_show = [c for c in cols_to_show if c in anr_anomalies.columns]
print(anr_anomalies[cols_to_show].head(5).to_string(index=False))
# Visualization - Anomalies
fig, axes = plt.subplots(2, 1, figsize=(14, 8))
# Crash anomalies
axes[0].scatter(df['Date'], df['Daily Crashes'], alpha=0.5, s=30, c='#3498db', label='Normal')
if len(crash_anomalies) > 0:
axes[0].scatter(crash_anomalies['Date'], crash_anomalies['Daily Crashes'],
color='#e74c3c', s=100, marker='X', label='Anomaly', zorder=5)
axes[0].set_title('Crash Anomaly Detection (Z-Score > 3)', fontsize=13, fontweight='bold')
axes[0].set_ylabel('Daily Crashes')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# ANR anomalies
axes[1].scatter(df['Date'], df['Daily ANRs'], alpha=0.5, s=30, c='#9b59b6', label='Normal')
if len(anr_anomalies) > 0:
axes[1].scatter(anr_anomalies['Date'], anr_anomalies['Daily ANRs'],
color='#e74c3c', s=100, marker='X', label='Anomaly', zorder=5)
axes[1].set_title('ANR Anomaly Detection (Z-Score > 3)', fontsize=13, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Daily ANRs')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('anomaly_detection.png', dpi=300, bbox_inches='tight')
print("\n✓ Chart saved: anomaly_detection.png")
print("\n" + "="*70)
print("ANOMALY DETECTION & STATISTICAL ANALYSIS COMPLETE")
print("="*70)
======================================================================
B. ANOMALY DETECTION & STATISTICAL ANALYSIS
======================================================================
1. GROUP COMPARISON ANALYSIS
----------------------------------------------------------------------
📊 OS Version Comparison:
OS_Version Sample_Size Avg_Crashes Avg_ANRs Crash_Rate ANR_Rate
Android 10 28 6.964286 11.785714 696.428571 1178.571429
Android 11 28 8.500000 13.392857 850.000000 1339.285714
Android 12 28 10.357143 20.071429 1035.714286 2007.142857
Android 12L 2 0.000000 1.000000 0.000000 100.000000
Android 13 28 13.607143 11.607143 1360.714286 1160.714286
Android 14 28 38.428571 149.142857 3842.857143 14914.285714
Android 15 28 1.571429 6.428571 157.142857 642.857143
Android 6.0 21 0.761905 2.285714 76.190476 228.571429
Android 7.0 17 0.352941 2.058824 35.294118 205.882353
Android 7.1 23 4.043478 2.782609 404.347826 278.260870
Android 8.0 22 2.227273 0.954545 222.727273 95.454545
Android 8.1 28 4.821429 8.678571 482.142857 867.857143
Android 9 28 24.250000 6.500000 2425.000000 650.000000
✓ Chart saved: group_comparison.png
2. CHI-SQUARE TEST (ASSOCIATION ANALYSIS)
----------------------------------------------------------------------
3. LOGISTIC REGRESSION ANALYSIS
----------------------------------------------------------------------
📈 Logistic Regression Model Performance:
Model Accuracy: 0.6609
Baseline (all no-crash): 0.3391
📊 Feature Coefficients & Odds Ratios:
Feature Coefficient Odds_Ratio
DayOfWeek 0.045513 1.046564
Hour 0.000000 1.000000
Month -0.084126 0.919316
OS_Encoded -0.130480 0.877674
🔍 Interpretation:
• OS_Encoded: DECREASES crash odds by 12.2%
✓ Chart saved: logistic_regression_odds.png
4. ANOMALY DETECTION
----------------------------------------------------------------------
⚠ Crash Anomalies Detected: 66
Top 5 Crash Anomalies:
Date Daily Crashes Android OS Version Android OS Version Crash_ZScore
2025-02-21 77.0 Android 14 Android 14 14.895559
2025-02-22 76.0 Android 14 Android 14 14.696837
2025-02-23 72.0 Android 14 Android 14 13.901947
2025-02-24 62.0 Android 14 Android 14 11.914724
2025-02-19 54.0 Android 14 Android 14 10.324946
⚠ ANR Anomalies Detected: 28
Top 5 ANR Anomalies:
Date Daily ANRs Android OS Version Android OS Version ANR_ZScore
2025-02-24 185.0 Android 14 Android 14 13.194475
2025-02-22 181.0 Android 14 Android 14 12.904970
2025-02-13 171.0 Android 14 Android 14 12.181208
2025-02-25 165.0 Android 14 Android 14 11.746951
2025-02-23 160.0 Android 14 Android 14 11.385070
✓ Chart saved: anomaly_detection.png
======================================================================
ANOMALY DETECTION & STATISTICAL ANALYSIS COMPLETE
======================================================================
NDPR Compliance:DPIA and Breach Evidence Analysis
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
print("="*80)
print("NDPR/PRIVACY COMPLIANCE ANALYSIS - BATCH 3")
print("DPIA (DATA PROTECTION IMPACT ASSESSMENT) & BREACH EVIDENCE")
print("="*80)
# Load the data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep only required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
print(f"\nDataset loaded: {len(df):,} records")
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
# ===========================================================================
# 1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)
# ===========================================================================
print("\n" + "="*80)
print("1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)")
print("="*80)
print(f"\n📋 DPIA FRAMEWORK - NDPR Article 25 Compliance")
print(f"\n A DPIA is REQUIRED when processing:")
print(f" • Involves systematic monitoring")
print(f" • Processes personal data at large scale")
print(f" • Uses automated decision-making")
print(f" • Processes sensitive or criminal data")
# Risk assessment based on data characteristics
risk_score = 0
risk_factors = []
print(f"\n🔍 RISK ASSESSMENT:")
# Factor 1: Volume of data
total_records = len(df)
if total_records > 10000:
risk_score += 3
risk_factors.append("High volume of records (>10,000)")
print(f" ⚠️ HIGH VOLUME: {total_records:,} records (Risk +3)")
elif total_records > 1000:
risk_score += 2
risk_factors.append("Medium volume of records (>1,000)")
print(f" ⚠️ MEDIUM VOLUME: {total_records:,} records (Risk +2)")
else:
risk_score += 1
print(f" ✓ LOW VOLUME: {total_records:,} records (Risk +1)")
# Factor 2: Data retention period
data_age_max = (datetime.now() - df['Date'].min()).days
if data_age_max > 365:
risk_score += 3
risk_factors.append(f"Extended retention period ({data_age_max} days)")
print(f" ⚠️ EXTENDED RETENTION: {data_age_max} days (Risk +3)")
elif data_age_max > 180:
risk_score += 2
print(f" ⚠️ MODERATE RETENTION: {data_age_max} days (Risk +2)")
else:
risk_score += 1
print(f" ✓ COMPLIANT RETENTION: {data_age_max} days (Risk +1)")
# Factor 3: Device identification
if 'Device' in df.columns:
unique_devices = df['Device'].nunique()
if unique_devices > 1000:
risk_score += 2
risk_factors.append(f"Large user base ({unique_devices} devices)")
print(f" ⚠️ LARGE USER BASE: {unique_devices:,} devices (Risk +2)")
else:
risk_score += 1
print(f" ✓ MODERATE USER BASE: {unique_devices:,} devices (Risk +1)")
# Factor 4: OS version diversity
unique_os = df['Android OS Version'].nunique()
if unique_os > 10:
risk_score += 1
print(f" ⚠️ HIGH OS FRAGMENTATION: {unique_os} versions (Risk +1)")
else:
print(f" ✓ LOW OS FRAGMENTATION: {unique_os} versions")
# Factor 5: Presence of crash data
total_crashes = df['Daily Crashes'].sum()
total_anrs = df['Daily ANRs'].sum()
if total_crashes > 0 or total_anrs > 0:
risk_score += 1
risk_factors.append(f"Contains performance incidents ({total_crashes} crashes, {total_anrs} ANRs)")
print(f" ⚠️ INCIDENT DATA: {total_crashes:,} crashes, {total_anrs:,} ANRs (Risk +1)")
# Calculate overall risk level
max_possible_risk = 11 # Maximum points from all factors
risk_percentage = (risk_score / max_possible_risk) * 100
print(f"\n📊 OVERALL DPIA RISK ASSESSMENT:")
print(f" Total Risk Score: {risk_score}/{max_possible_risk} ({risk_percentage:.1f}%)")
if risk_percentage >= 70:
risk_level = "HIGH RISK"
risk_color = "🔴"
elif risk_percentage >= 40:
risk_level = "MEDIUM RISK"
risk_color = "🟡"
else:
risk_level = "LOW RISK"
risk_color = "🟢"
print(f" Risk Level: {risk_color} {risk_level}")
if risk_level == "HIGH RISK":
print(f"\n 🔴 ACTION REQUIRED:")
print(f" • Complete formal DPIA documentation")
print(f" • Consult with Data Protection Officer (DPO)")
print(f" • Implement additional safeguards")
print(f" • Consider privacy-by-design improvements")
elif risk_level == "MEDIUM RISK":
print(f"\n 🟡 RECOMMENDATION:")
print(f" • Document risk assessment findings")
print(f" • Review data retention policies")
print(f" • Enhance security measures")
print(f"\n📋 KEY RISK FACTORS IDENTIFIED:")
for i, factor in enumerate(risk_factors, 1):
print(f" {i}. {factor}")
# ===========================================================================
# 2. DPIA COMPLIANCE CHECKLIST
# ===========================================================================
print("\n\n" + "="*80)
print("2. DPIA COMPLIANCE CHECKLIST")
print("="*80)
dpia_checklist = [
("Data Processing Necessity", "Is crash/ANR monitoring necessary for app functionality?"),
("Legitimate Interest", "Does monitoring serve legitimate business interest?"),
("User Consent", "Have users been informed and given consent?"),
("Purpose Limitation", "Is data used only for stated purpose?"),
("Data Minimization", "Are we collecting only necessary data?"),
("Storage Limitation", "Is retention period justified and documented?"),
("Security Measures", "Are appropriate technical measures in place?"),
("Rights Management", "Can users exercise their data rights (access, deletion)?"),
("Processor Agreements", "Are third-party processors under contract?"),
("Breach Response", "Is incident response plan documented?")
]
print(f"\n✓ Complete the following DPIA requirements:\n")
for i, (requirement, question) in enumerate(dpia_checklist, 1):
print(f" [{' '}] {i}. {requirement}")
print(f" Q: {question}")
print()
# ===========================================================================
# 3. BREACH EVIDENCE ANALYSIS
# ===========================================================================
print("\n" + "="*80)
print("3. BREACH EVIDENCE ANALYSIS")
print("="*80)
print(f"\n🔍 ANALYZING DATA ACCESS PATTERNS FOR SUSPICIOUS ACTIVITY...")
# Analyze daily record patterns
daily_records = df.groupby('Date').size().reset_index(name='Record_Count')
daily_records = daily_records.sort_values('Date')
# Statistical anomaly detection
mean_count = daily_records['Record_Count'].mean()
std_count = daily_records['Record_Count'].std()
daily_records['Z_Score'] = (daily_records['Record_Count'] - mean_count) / std_count
daily_records['Is_Anomaly'] = abs(daily_records['Z_Score']) > 2
anomalies = daily_records[daily_records['Is_Anomaly']]
anomaly_count = len(anomalies)
print(f"\n📊 ACCESS PATTERN ANALYSIS:")
print(f" Mean daily records: {mean_count:.2f}")
print(f" Standard deviation: {std_count:.2f}")
print(f" Detection threshold: ±2 standard deviations")
print(f" Total days analyzed: {len(daily_records)}")
print(f" Anomalies detected: {anomaly_count}")
if anomaly_count > 0:
print(f"\n⚠️ SUSPICIOUS ACTIVITY DETECTED!")
print(f"\n Days with unusual access patterns:")
for idx, row in anomalies.iterrows():
date_str = row['Date'].strftime('%Y-%m-%d')
deviation = "HIGH" if row['Z_Score'] > 2 else "LOW"
print(f" • {date_str}: {row['Record_Count']} records (Z-score: {row['Z_Score']:.2f}, {deviation})")
print(f"\n 🔴 INVESTIGATION ACTIONS REQUIRED:")
print(f" 1. Review server access logs for these dates")
print(f" 2. Check for unauthorized data exports")
print(f" 3. Verify user authentication logs")
print(f" 4. Confirm business justification for spikes")
print(f" 5. Document findings in security incident log")
else:
print(f"\n ✅ NO ANOMALOUS ACCESS PATTERNS DETECTED")
print(f" Data access patterns appear normal and consistent")
# Analyze crash/ANR spikes
print(f"\n\n📊 INCIDENT SPIKE ANALYSIS:")
daily_crashes = df.groupby('Date')['Daily Crashes'].sum()
daily_anrs = df.groupby('Date')['Daily ANRs'].sum()
crash_mean = daily_crashes.mean()
crash_std = daily_crashes.std()
anr_mean = daily_anrs.mean()
anr_std = daily_anrs.std()
crash_spikes = daily_crashes[daily_crashes > (crash_mean + 2 * crash_std)]
anr_spikes = daily_anrs[daily_anrs > (anr_mean + 2 * anr_std)]
print(f" Crash spikes detected: {len(crash_spikes)}")
print(f" ANR spikes detected: {len(anr_spikes)}")
if len(crash_spikes) > 0:
print(f"\n ⚠️ CRASH SPIKES ON:")
for date, value in crash_spikes.items():
print(f" • {date.strftime('%Y-%m-%d')}: {value:.0f} crashes ({((value/crash_mean)-1)*100:.1f}% above normal)")
if len(anr_spikes) > 0:
print(f"\n ⚠️ ANR SPIKES ON:")
for date, value in anr_spikes.items():
print(f" • {date.strftime('%Y-%m-%d')}: {value:.0f} ANRs ({((value/anr_mean)-1)*100:.1f}% above normal)")
# ===========================================================================
# 4. BREACH NOTIFICATION REQUIREMENTS
# ===========================================================================
print("\n\n" + "="*80)
print("4. BREACH NOTIFICATION REQUIREMENTS (NDPR Article 33)")
print("="*80)
print(f"\n⚠️ IF A DATA BREACH IS CONFIRMED:")
print(f"\n Timeline Requirements:")
print(f" • Notify NITDA within 72 hours of awareness")
print(f" • Document breach details and response actions")
print(f" • Notify affected users if high risk to their rights")
print(f"\n Breach Documentation Must Include:")
print(f" 1. Nature of the breach (unauthorized access, exfiltration, etc.)")
print(f" 2. Categories and approximate number of data subjects affected")
print(f" 3. Categories and approximate number of records affected")
print(f" 4. Likely consequences of the breach")
print(f" 5. Measures taken to address the breach")
print(f" 6. Measures to mitigate possible adverse effects")
print(f"\n Contact Information:")
print(f" • NITDA: info@nitda.gov.ng")
print(f" • Website: https://nitda.gov.ng")
# ===========================================================================
# 5. INCIDENT RESPONSE CHECKLIST
# ===========================================================================
print("\n\n" + "="*80)
print("5. SECURITY INCIDENT RESPONSE CHECKLIST")
print("="*80)
incident_checklist = [
"Isolate affected systems immediately",
"Preserve evidence (logs, access records, timestamps)",
"Notify Data Protection Officer (DPO)",
"Assemble incident response team",
"Conduct forensic analysis of breach scope",
"Identify compromised data and affected users",
"Assess risk to individuals' rights and freedoms",
"Document all actions taken with timestamps",
"Prepare breach notification for NITDA",
"Notify affected users if required",
"Implement remediation measures",
"Conduct post-incident review",
"Update security policies and procedures"
]
print(f"\n📋 IMMEDIATE ACTIONS IF BREACH SUSPECTED:\n")
for i, action in enumerate(incident_checklist, 1):
print(f" [{' '}] {i}. {action}")
# ===========================================================================
# 6. AUDIT TRAIL REQUIREMENTS
# ===========================================================================
print("\n\n" + "="*80)
print("6. AUDIT TRAIL & LOGGING REQUIREMENTS")
print("="*80)
print(f"\n📝 REQUIRED AUDIT LOGS (must be maintained):")
print(f"\n Access Logs:")
print(f" • User ID accessing the data")
print(f" • Timestamp of access")
print(f" • IP address of accessing device")
print(f" • Type of operation (read, write, delete, export)")
print(f" • Records accessed or modified")
print(f"\n Security Event Logs:")
print(f" • Failed authentication attempts")
print(f" • Privilege escalation events")
print(f" • Configuration changes")
print(f" • Data export activities")
print(f" • Unusual query patterns")
print(f"\n Retention:")
print(f" • Audit logs must be retained for minimum 1 year")
print(f" • Logs must be tamper-proof and encrypted")
print(f" • Regular review by security team required")
# Generate summary report
print("\n\n" + "="*80)
print("BATCH 3 SUMMARY REPORT")
print("="*80)
print(f"\n📊 DPIA RISK ASSESSMENT:")
print(f" Overall Risk Level: {risk_level} ({risk_percentage:.1f}%)")
print(f" Risk Factors: {len(risk_factors)}")
print(f"\n🔍 BREACH EVIDENCE:")
print(f" Anomalous Days: {anomaly_count}")
print(f" Crash Spikes: {len(crash_spikes)}")
print(f" ANR Spikes: {len(anr_spikes)}")
if anomaly_count > 0 or len(crash_spikes) > 0 or len(anr_spikes) > 0:
print(f"\n ⚠️ STATUS: INVESTIGATION REQUIRED")
else:
print(f"\n ✅ STATUS: NO IMMEDIATE CONCERNS")
print(f"\n📋 COMPLIANCE STATUS:")
print(f" DPIA Required: {'YES' if risk_level in ['HIGH RISK', 'MEDIUM RISK'] else 'RECOMMENDED'}")
print(f" NITDA Registration: VERIFY MANUALLY")
print(f" Data Protection Officer: VERIFY APPOINTED")
print("\n" + "="*80)
print("BATCH 3 COMPLETE - DPIA & BREACH EVIDENCE ANALYSIS")
print("="*80)
================================================================================
NDPR/PRIVACY COMPLIANCE ANALYSIS - BATCH 3
DPIA (DATA PROTECTION IMPACT ASSESSMENT) & BREACH EVIDENCE
================================================================================
Dataset loaded: 3,391 records
Date range: 2024-07-05 00:00:00 to 2025-05-11 00:00:00
================================================================================
1. DATA PROTECTION IMPACT ASSESSMENT (DPIA)
================================================================================
📋 DPIA FRAMEWORK - NDPR Article 25 Compliance
A DPIA is REQUIRED when processing:
• Involves systematic monitoring
• Processes personal data at large scale
• Uses automated decision-making
• Processes sensitive or criminal data
🔍 RISK ASSESSMENT:
⚠️ MEDIUM VOLUME: 3,391 records (Risk +2)
⚠️ EXTENDED RETENTION: 470 days (Risk +3)
✓ MODERATE USER BASE: 782 devices (Risk +1)
⚠️ HIGH OS FRAGMENTATION: 13 versions (Risk +1)
⚠️ INCIDENT DATA: 6,929.0 crashes, 9,142.0 ANRs (Risk +1)
📊 OVERALL DPIA RISK ASSESSMENT:
Total Risk Score: 8/11 (72.7%)
Risk Level: 🔴 HIGH RISK
🔴 ACTION REQUIRED:
• Complete formal DPIA documentation
• Consult with Data Protection Officer (DPO)
• Implement additional safeguards
• Consider privacy-by-design improvements
📋 KEY RISK FACTORS IDENTIFIED:
1. Medium volume of records (>1,000)
2. Extended retention period (470 days)
3. Contains performance incidents (6929.0 crashes, 9142.0 ANRs)
================================================================================
2. DPIA COMPLIANCE CHECKLIST
================================================================================
✓ Complete the following DPIA requirements:
[ ] 1. Data Processing Necessity
Q: Is crash/ANR monitoring necessary for app functionality?
[ ] 2. Legitimate Interest
Q: Does monitoring serve legitimate business interest?
[ ] 3. User Consent
Q: Have users been informed and given consent?
[ ] 4. Purpose Limitation
Q: Is data used only for stated purpose?
[ ] 5. Data Minimization
Q: Are we collecting only necessary data?
[ ] 6. Storage Limitation
Q: Is retention period justified and documented?
[ ] 7. Security Measures
Q: Are appropriate technical measures in place?
[ ] 8. Rights Management
Q: Can users exercise their data rights (access, deletion)?
[ ] 9. Processor Agreements
Q: Are third-party processors under contract?
[ ] 10. Breach Response
Q: Is incident response plan documented?
================================================================================
3. BREACH EVIDENCE ANALYSIS
================================================================================
🔍 ANALYZING DATA ACCESS PATTERNS FOR SUSPICIOUS ACTIVITY...
📊 ACCESS PATTERN ANALYSIS:
Mean daily records: 56.52
Standard deviation: 76.69
Detection threshold: ±2 standard deviations
Total days analyzed: 60
Anomalies detected: 3
⚠️ SUSPICIOUS ACTIVITY DETECTED!
Days with unusual access patterns:
• 2024-07-24: 211 records (Z-score: 2.01, HIGH)
• 2024-07-25: 237 records (Z-score: 2.35, HIGH)
• 2024-07-26: 214 records (Z-score: 2.05, HIGH)
🔴 INVESTIGATION ACTIONS REQUIRED:
1. Review server access logs for these dates
2. Check for unauthorized data exports
3. Verify user authentication logs
4. Confirm business justification for spikes
5. Document findings in security incident log
📊 INCIDENT SPIKE ANALYSIS:
Crash spikes detected: 1
ANR spikes detected: 1
⚠️ CRASH SPIKES ON:
• 2024-07-25: 386 crashes (234.2% above normal)
⚠️ ANR SPIKES ON:
• 2025-02-22: 368 ANRs (141.5% above normal)
================================================================================
4. BREACH NOTIFICATION REQUIREMENTS (NDPR Article 33)
================================================================================
⚠️ IF A DATA BREACH IS CONFIRMED:
Timeline Requirements:
• Notify NITDA within 72 hours of awareness
• Document breach details and response actions
• Notify affected users if high risk to their rights
Breach Documentation Must Include:
1. Nature of the breach (unauthorized access, exfiltration, etc.)
2. Categories and approximate number of data subjects affected
3. Categories and approximate number of records affected
4. Likely consequences of the breach
5. Measures taken to address the breach
6. Measures to mitigate possible adverse effects
Contact Information:
• NITDA: info@nitda.gov.ng
• Website: https://nitda.gov.ng
================================================================================
5. SECURITY INCIDENT RESPONSE CHECKLIST
================================================================================
📋 IMMEDIATE ACTIONS IF BREACH SUSPECTED:
[ ] 1. Isolate affected systems immediately
[ ] 2. Preserve evidence (logs, access records, timestamps)
[ ] 3. Notify Data Protection Officer (DPO)
[ ] 4. Assemble incident response team
[ ] 5. Conduct forensic analysis of breach scope
[ ] 6. Identify compromised data and affected users
[ ] 7. Assess risk to individuals' rights and freedoms
[ ] 8. Document all actions taken with timestamps
[ ] 9. Prepare breach notification for NITDA
[ ] 10. Notify affected users if required
[ ] 11. Implement remediation measures
[ ] 12. Conduct post-incident review
[ ] 13. Update security policies and procedures
================================================================================
6. AUDIT TRAIL & LOGGING REQUIREMENTS
================================================================================
📝 REQUIRED AUDIT LOGS (must be maintained):
Access Logs:
• User ID accessing the data
• Timestamp of access
• IP address of accessing device
• Type of operation (read, write, delete, export)
• Records accessed or modified
Security Event Logs:
• Failed authentication attempts
• Privilege escalation events
• Configuration changes
• Data export activities
• Unusual query patterns
Retention:
• Audit logs must be retained for minimum 1 year
• Logs must be tamper-proof and encrypted
• Regular review by security team required
================================================================================
BATCH 3 SUMMARY REPORT
================================================================================
📊 DPIA RISK ASSESSMENT:
Overall Risk Level: HIGH RISK (72.7%)
Risk Factors: 3
🔍 BREACH EVIDENCE:
Anomalous Days: 3
Crash Spikes: 1
ANR Spikes: 1
⚠️ STATUS: INVESTIGATION REQUIRED
📋 COMPLIANCE STATUS:
DPIA Required: YES
NITDA Registration: VERIFY MANUALLY
Data Protection Officer: VERIFY APPOINTED
================================================================================
BATCH 3 COMPLETE - DPIA & BREACH EVIDENCE ANALYSIS
================================================================================
PLOT CHARTS: NDPR Compliance:DPIA and Breach Evidence Analysis
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = 300
# Load and prepare data
df = pd.read_csv(r"C:\Users\hp\Desktop\WEEK 3_DSHub\Merged\ANR\Combined_App_Performance.csv")
df.columns = df.columns.str.strip()
# Find columns
date_col = [col for col in df.columns if 'date' in col.lower()][0]
device_col = [col for col in df.columns if 'device' in col.lower()][0] if any('device' in col.lower() for col in df.columns) else None
# Keep required columns
columns_to_keep = ['Daily ANRs', 'Android OS Version', 'Daily Crashes', date_col]
if device_col:
columns_to_keep.append(device_col)
df = df[columns_to_keep].copy()
# Clean data
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df = df.rename(columns={date_col: 'Date'})
df['Daily Crashes'] = pd.to_numeric(df['Daily Crashes'], errors='coerce')
df['Daily ANRs'] = pd.to_numeric(df['Daily ANRs'], errors='coerce')
df = df.dropna(subset=['Date', 'Daily Crashes', 'Daily ANRs'])
if device_col:
df = df.rename(columns={device_col: 'Device'})
# ===========================================================================
# CHART 1: DPIA RISK ASSESSMENT VISUALIZATION
# ===========================================================================
# Calculate risk factors
risk_factors = []
risk_scores = []
risk_levels = []
# Volume Risk
total_records = len(df)
if total_records > 10000:
risk_factors.append('Data Volume')
risk_scores.append(3)
risk_levels.append('HIGH')
elif total_records > 1000:
risk_factors.append('Data Volume')
risk_scores.append(2)
risk_levels.append('MEDIUM')
else:
risk_factors.append('Data Volume')
risk_scores.append(1)
risk_levels.append('LOW')
# Retention Risk
data_age = (datetime.now() - df['Date'].min()).days
if data_age > 365:
risk_factors.append('Retention Period')
risk_scores.append(3)
risk_levels.append('HIGH')
elif data_age > 180:
risk_factors.append('Retention Period')
risk_scores.append(2)
risk_levels.append('MEDIUM')
else:
risk_factors.append('Retention Period')
risk_scores.append(1)
risk_levels.append('LOW')
# Device Identification Risk
if 'Device' in df.columns:
unique_devices = df['Device'].nunique()
if unique_devices > 1000:
risk_factors.append('User Base Size')
risk_scores.append(2)
risk_levels.append('MEDIUM')
else:
risk_factors.append('User Base Size')
risk_scores.append(1)
risk_levels.append('LOW')
# OS Version Diversity Risk
unique_os = df['Android OS Version'].nunique()
if unique_os > 10:
risk_factors.append('OS Fragmentation')
risk_scores.append(2)
risk_levels.append('MEDIUM')
else:
risk_factors.append('OS Fragmentation')
risk_scores.append(1)
risk_levels.append('LOW')
# Data Sensitivity Risk (based on crash/ANR presence)
has_crashes = (df['Daily Crashes'] > 0).any()
has_anrs = (df['Daily ANRs'] > 0).any()
if has_crashes and has_anrs:
risk_factors.append('Data Sensitivity')
risk_scores.append(2)
risk_levels.append('MEDIUM')
else:
risk_factors.append('Data Sensitivity')
risk_scores.append(1)
risk_levels.append('LOW')
# Create DPIA Risk Assessment Chart
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
# Chart 1: Risk Score by Factor
colors = ['#e74c3c' if s == 3 else '#f39c12' if s == 2 else '#27ae60' for s in risk_scores]
bars = axes[0, 0].barh(risk_factors, risk_scores, color=colors, alpha=0.8, edgecolor='black')
axes[0, 0].set_xlabel('Risk Score', fontsize=12, fontweight='bold')
axes[0, 0].set_title('DPIA: Risk Assessment by Factor', fontsize=13, fontweight='bold')
axes[0, 0].set_xlim(0, 3.5)
axes[0, 0].grid(True, alpha=0.3, axis='x')
# Add value labels and risk levels
for i, (score, level) in enumerate(zip(risk_scores, risk_levels)):
axes[0, 0].text(score + 0.1, i, f'{score} - {level}', va='center',
fontsize=10, fontweight='bold')
# Add legend for risk levels
axes[0, 0].axvline(x=1, color='#27ae60', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 0].axvline(x=2, color='#f39c12', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 0].axvline(x=3, color='#e74c3c', linestyle='--', alpha=0.5, linewidth=2)
# Chart 2: Overall Risk Gauge
total_risk = sum(risk_scores)
max_possible_risk = len(risk_factors) * 3
risk_percentage = (total_risk / max_possible_risk) * 100
if risk_percentage >= 70:
gauge_color = '#e74c3c'
risk_category = 'HIGH RISK'
elif risk_percentage >= 40:
gauge_color = '#f39c12'
risk_category = 'MEDIUM RISK'
else:
gauge_color = '#27ae60'
risk_category = 'LOW RISK'
axes[0, 1].barh(['Overall DPIA Risk'], [risk_percentage], color=gauge_color,
alpha=0.8, edgecolor='black', height=0.5)
axes[0, 1].set_xlabel('Risk Percentage (%)', fontsize=12, fontweight='bold')
axes[0, 1].set_title(f'Overall DPIA Risk Level: {risk_category}',
fontsize=13, fontweight='bold')
axes[0, 1].set_xlim(0, 100)
axes[0, 1].text(risk_percentage + 2, 0, f'{risk_percentage:.1f}%',
va='center', fontsize=14, fontweight='bold')
axes[0, 1].axvline(x=40, color='#f39c12', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 1].axvline(x=70, color='#e74c3c', linestyle='--', alpha=0.5, linewidth=2)
axes[0, 1].grid(True, alpha=0.3, axis='x')
# ===========================================================================
# CHART 3: BREACH EVIDENCE - UNUSUAL ACTIVITY DETECTION
# ===========================================================================
# Analyze daily record patterns for anomalies
daily_records = df.groupby('Date').size().reset_index(name='Record_Count')
daily_records = daily_records.sort_values('Date')
# Calculate z-scores for anomaly detection
mean_count = daily_records['Record_Count'].mean()
std_count = daily_records['Record_Count'].std()
daily_records['Z_Score'] = (daily_records['Record_Count'] - mean_count) / std_count
daily_records['Is_Anomaly'] = abs(daily_records['Z_Score']) > 2
# Chart 3: Daily Activity Pattern with Anomalies
normal_data = daily_records[~daily_records['Is_Anomaly']]
anomaly_data = daily_records[daily_records['Is_Anomaly']]
axes[1, 0].plot(normal_data['Date'], normal_data['Record_Count'],
marker='o', linewidth=2, markersize=4, color='#3498db',
label='Normal Activity')
if len(anomaly_data) > 0:
axes[1, 0].scatter(anomaly_data['Date'], anomaly_data['Record_Count'],
color='#e74c3c', s=200, marker='X',
label='Suspicious Activity', zorder=5, edgecolor='black', linewidth=2)
# Add threshold lines
upper_threshold = mean_count + 2 * std_count
lower_threshold = max(0, mean_count - 2 * std_count)
axes[1, 0].axhline(y=upper_threshold, color='red', linestyle='--',
linewidth=2, alpha=0.6, label=f'Upper Threshold: {upper_threshold:.0f}')
axes[1, 0].axhline(y=mean_count, color='green', linestyle='-',
linewidth=2, alpha=0.6, label=f'Mean: {mean_count:.0f}')
axes[1, 0].set_xlabel('Date', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Daily Record Count', fontsize=12, fontweight='bold')
axes[1, 0].set_title('Breach Detection: Unusual Data Access Patterns',
fontsize=13, fontweight='bold')
axes[1, 0].legend(loc='best', fontsize=9)
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].tick_params(axis='x', rotation=45)
# Chart 4: Anomaly Summary
anomaly_count = daily_records['Is_Anomaly'].sum()
normal_count = (~daily_records['Is_Anomaly']).sum()
categories = ['Normal\nActivity', 'Suspicious\nActivity']
counts = [normal_count, anomaly_count]
colors_pie = ['#27ae60', '#e74c3c']
wedges, texts, autotexts = axes[1, 1].pie(counts, labels=categories,
colors=colors_pie, autopct='%1.1f%%',
startangle=90, explode=(0, 0.1),
textprops={'fontsize': 11, 'fontweight': 'bold'})
axes[1, 1].set_title(f'Security Incident Summary\n({anomaly_count} Anomalies Detected)',
fontsize=13, fontweight='bold')
# Add summary text
summary_text = f'Total Days Analyzed: {len(daily_records)}\n'
summary_text += f'Normal Days: {normal_count}\n'
summary_text += f'Anomalous Days: {anomaly_count}\n'
if anomaly_count > 0:
summary_text += f'\n⚠️ INVESTIGATION REQUIRED'
else:
summary_text += f'\n✅ NO THREATS DETECTED'
axes[1, 1].text(0, -1.4, summary_text, fontsize=10, ha='center',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout()
plt.savefig('ndpr_batch3_dpia_breach_analysis.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: ndpr_batch3_dpia_breach_analysis.png")
# ===========================================================================
# CHART 5: COMPLIANCE SCORECARD
# ===========================================================================
fig, ax = plt.subplots(figsize=(12, 8))
# Define compliance metrics
compliance_metrics = [
'PII Detection',
'Data Minimization',
'Retention Policy',
'Access Control',
'Encryption at Rest',
'Encryption in Transit',
'User Consent',
'DPIA Completed',
'Breach Response Plan',
'Audit Logging'
]
# Simulate compliance scores (in real scenario, these would be actual checks)
# For this analysis, we'll mark what can be verified from data
compliance_status = []
compliance_colors = []
# PII Detection - can verify
pii_found = False # Based on earlier analysis
compliance_status.append('PASS' if not pii_found else 'REVIEW NEEDED')
compliance_colors.append('#27ae60' if not pii_found else '#f39c12')
# Data Minimization - can verify
minimal_columns = len(df.columns) <= 5
compliance_status.append('PASS' if minimal_columns else 'REVIEW NEEDED')
compliance_colors.append('#27ae60' if minimal_columns else '#f39c12')
# Retention Policy - can verify
retention_ok = data_age <= 365
compliance_status.append('PASS' if retention_ok else 'FAIL')
compliance_colors.append('#27ae60' if retention_ok else '#e74c3c')
# Others require manual verification
manual_checks = ['MANUAL CHECK'] * 7
compliance_status.extend(manual_checks)
compliance_colors.extend(['#95a5a6'] * 7)
# Create horizontal bar chart
y_pos = np.arange(len(compliance_metrics))
ax.barh(y_pos, [1]*len(compliance_metrics), color=compliance_colors,
alpha=0.8, edgecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(compliance_metrics, fontsize=11)
ax.set_xlim(0, 1.5)
ax.set_xticks([])
ax.set_title('NDPR Compliance Scorecard', fontsize=15, fontweight='bold', pad=20)
# Add status labels
for i, (metric, status, color) in enumerate(zip(compliance_metrics, compliance_status, compliance_colors)):
ax.text(1.05, i, status, va='center', fontsize=10, fontweight='bold',
bbox=dict(boxstyle='round', facecolor='white', edgecolor=color, linewidth=2))
# Add legend
from matplotlib.patches import Patch
legend_elements = [
Patch(facecolor='#27ae60', label='PASS', alpha=0.8),
Patch(facecolor='#f39c12', label='REVIEW NEEDED', alpha=0.8),
Patch(facecolor='#e74c3c', label='FAIL', alpha=0.8),
Patch(facecolor='#95a5a6', label='MANUAL CHECK REQUIRED', alpha=0.8)
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
# Add summary box
pass_count = compliance_status.count('PASS')
fail_count = compliance_status.count('FAIL')
review_count = compliance_status.count('REVIEW NEEDED')
manual_count = compliance_status.count('MANUAL CHECK')
summary = f'Compliance Summary:\n'
summary += f'✓ Pass: {pass_count}/{len(compliance_metrics)}\n'
summary += f'⚠ Review: {review_count}/{len(compliance_metrics)}\n'
summary += f'✗ Fail: {fail_count}/{len(compliance_metrics)}\n'
summary += f'⊙ Manual: {manual_count}/{len(compliance_metrics)}'
ax.text(0.02, len(compliance_metrics) - 1, summary, fontsize=10,
bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8),
verticalalignment='top')
plt.tight_layout()
plt.savefig('ndpr_batch3_compliance_scorecard.png', dpi=300, bbox_inches='tight')
plt.show()
print("✓ Chart saved: ndpr_batch3_compliance_scorecard.png")
print("\n" + "="*80)
print("NDPR BATCH 3 CHARTS COMPLETE")
print("="*80)
print(f"\nGenerated Charts:")
print(f" 1. ndpr_batch3_dpia_breach_analysis.png")
print(f" - DPIA risk assessment by factor")
print(f" - Overall risk gauge")
print(f" - Breach detection with anomalies")
print(f" - Security incident summary")
print(f"\n 2. ndpr_batch3_compliance_scorecard.png")
print(f" - Complete NDPR compliance checklist")
print(f" - Automated and manual verification status")
✓ Chart saved: ndpr_batch3_dpia_breach_analysis.png
✓ Chart saved: ndpr_batch3_compliance_scorecard.png
================================================================================
NDPR BATCH 3 CHARTS COMPLETE
================================================================================
Generated Charts:
1. ndpr_batch3_dpia_breach_analysis.png
- DPIA risk assessment by factor
- Overall risk gauge
- Breach detection with anomalies
- Security incident summary
2. ndpr_batch3_compliance_scorecard.png
- Complete NDPR compliance checklist
- Automated and manual verification status